Anklang 0.3.0-460-gc4ef46ba
ASE — Anklang Sound Engine (C++)

« « « Anklang Documentation
Loading...
Searching...
No Matches
benchmarks.cc
Go to the documentation of this file.
1 // This Source Code Form is licensed MPL-2.0: http://mozilla.org/MPL/2.0
2#include "testing.hh"
3#include "unicode.hh"
4#include "memory.hh"
5#include "loft.hh"
6#include "internal.hh"
7#include <cmath>
8
9#include <glib.h>
10
11static constexpr size_t RUNS = 1;
12static constexpr double MAXTIME = 0.15;
13static constexpr double M = 1000000;
14
15// == Unicode Tests ==
16static std::string
17all_codepoints_to_utf8 ()
18{
19 std::vector<uint32_t> codepoints;
20 for (size_t i = 1; i <= Ase::unicode_last_codepoint; i++)
22 codepoints.push_back (i);
23 return Ase::utf8encode (codepoints);
24}
25
26TEST_BENCHMARK (utf8_codepoint_bench);
27static void
28utf8_codepoint_bench()
29{
30 std::string big = all_codepoints_to_utf8();
31 const size_t expected = Ase::utf8len (big.c_str());
32 double bench_time;
33 Ase::Test::Timer timer (MAXTIME);
34
35 auto loop_g_utf8_to_ucs4_fast = [&] () {
36 for (size_t j = 0; j < RUNS; j++)
37 {
38 glong long_result;
39 gunichar *gu = g_utf8_to_ucs4_fast (big.c_str(), -1, &long_result);
40 gulong result = long_result;
41 TCMP (expected, ==, result);
42 g_free (gu);
43 }
44 };
45 bench_time = timer.benchmark (loop_g_utf8_to_ucs4_fast);
46 Ase::printerr (" BENCH g_utf8_to_ucs4_fast: %11.1f MChar/s\n", big.size() * RUNS / bench_time / M);
47
48 auto loop_ase_utf8_to_unicode = [&] () {
49 for (size_t j = 0; j < RUNS; j++)
50 {
51 std::vector<uint32_t> codepoints;
52 codepoints.resize (expected); // force reallocation to be comparable with g_utf8_to_ucs4_fast
53 size_t result = Ase::utf8_to_unicode (big.c_str(), codepoints.data());
54 TCMP (expected, ==, result);
55 }
56 };
57 bench_time = timer.benchmark (loop_ase_utf8_to_unicode);
58 Ase::printerr (" BENCH Ase::utf8_to_unicode: %11.1f MChar/s\n", big.size() * RUNS / bench_time / M);
59
60 std::vector<uint32_t> codepoints;
61 codepoints.resize (expected);
62 auto loop_inplace_utf8_to_unicode = [&] () {
63 for (size_t j = 0; j < RUNS; j++)
64 {
65 size_t result = Ase::utf8_to_unicode (big.c_str(), codepoints.data());
66 TCMP (expected, ==, result);
67 }
68 };
69 bench_time = timer.benchmark (loop_inplace_utf8_to_unicode);
70 Ase::printerr (" BENCH utf8_to_unicode inplace: %11.1f MChar/s\n", big.size() * RUNS / bench_time / M);
71
72 glong gresult;
73 gunichar *gu = g_utf8_to_ucs4_fast (big.c_str(), -1, &gresult);
74 TASSERT (expected == (size_t) gresult);
75 for (size_t i = 0; i < expected; i++)
76 if (gu[i] != codepoints[i])
77 {
78 Ase::printerr (" BENCH 0x%06x) 0x%06x != 0x%06x\n", i + 1, gu[i], codepoints[i]);
79 TCMP (gu[i], ==, codepoints[i]);
80 }
81 g_free (gu);
82}
83
84static size_t
85not_0x80_strlen_utf8 (const std::string &str)
86{
87 size_t length = 0;
88 for (char c : str)
89 length += (c & 0xC0) != 0x80;
90 return length;
91}
92
93static void
94utf8_strlen_bench (const std::string &str, const std::string &what)
95{
96 const size_t expected = Ase::utf8len (str.c_str());
97 double bench_time;
98 Ase::Test::Timer timer (MAXTIME);
99
100 auto glib_utf8len_loop = [&] () {
101 for (size_t j = 0; j < RUNS; j++)
102 {
103 size_t result = g_utf8_strlen (str.c_str(), -1);
104 TCMP (expected, ==, result);
105 }
106 };
107 bench_time = timer.benchmark (glib_utf8len_loop);
108 Ase::printerr (" BENCH g_utf8_strlen: %11.1f MChar/s %s\n", str.size() * RUNS / bench_time / M, what);
109
110 auto ase_utf8len_s_loop = [&] () {
111 for (size_t j = 0; j < RUNS; j++)
112 {
113 size_t result = Ase::utf8len (str); // pick utf8len (std::string&)
114 TCMP (expected, ==, result);
115 }
116 };
117 bench_time = timer.benchmark (ase_utf8len_s_loop);
118 Ase::printerr (" BENCH Ase::utf8len(string&): %11.1f MChar/s %s\n", str.size() * RUNS / bench_time / M, what);
119
120 auto ase_utf8len_c_loop = [&] () {
121 for (size_t j = 0; j < RUNS; j++)
122 {
123 size_t result = Ase::utf8len (str.c_str()); // pick utf8len(const char*)
124 TCMP (expected, ==, result);
125 }
126 };
127 bench_time = timer.benchmark (ase_utf8len_c_loop);
128 Ase::printerr (" BENCH Ase::utf8len(char*): %11.1f MChar/s %s\n", str.size() * RUNS / bench_time / M, what);
129
130 auto simple_utf8len_loop = [&] () {
131 for (size_t j = 0; j < RUNS; j++)
132 {
133 size_t result = not_0x80_strlen_utf8 (str);
134 TCMP (expected, ==, result);
135 }
136 };
137 bench_time = timer.benchmark (simple_utf8len_loop);
138 Ase::printerr (" BENCH not_0x80_strlen_utf8: %11.1f MChar/s %s\n", str.size() * RUNS / bench_time / M, what);
139}
140
141TEST_BENCHMARK (utf8_strlen_bench_high_planes);
142static void
143utf8_strlen_bench_high_planes()
144{
145 std::string big = all_codepoints_to_utf8();
146 utf8_strlen_bench (big, "(high planes)");
147}
148
149TEST_BENCHMARK (utf8_strlen_bench_ascii);
150static void
151utf8_strlen_bench_ascii()
152{
153 std::string big;
154 big.resize (Ase::unicode_last_codepoint * 1.07); // roughly equivalent length to the high_planes test
155 for (size_t i = 0; i < big.size(); i++)
156 big[i] = 1 + i % 0x7F; // fill string with 0x01..0xf7 characters
157 utf8_strlen_bench (big, "(ascii)");
158}
159
160// == Allocator Tests ==
161namespace { // Anon
162using namespace Ase;
163
164#define TEST_AREA_SIZE (16 * 1024 * 1024)
165
166static FastMemory::Arena fast_memory_arena { TEST_AREA_SIZE };
167
168static void
169ensure_block_allocator_initialization()
170{
171 fast_mem_free (fast_mem_alloc (1024));
172 const size_t areasize = 4 * 1024 * 1024;
173 TASSERT (fast_memory_arena.reserved() >= areasize);
174 FastMemory::Block b1 = fast_memory_arena.allocate (areasize / 4);
175 FastMemory::Block b2 = fast_memory_arena.allocate (areasize / 4);
176 FastMemory::Block b3 = fast_memory_arena.allocate (areasize / 4);
177 FastMemory::Block b4 = fast_memory_arena.allocate (areasize / 4);
178 fast_memory_arena.release (b1);
179 fast_memory_arena.release (b2);
180 fast_memory_arena.release (b3);
181 fast_memory_arena.release (b4);
182}
183
184enum class AllocatorType {
185 FastMemoryArea = 1,
186 FastMemAlloc,
187 LoftAlloc,
188 PosixMemalign,
189 LibcCalloc,
190};
191
192template<AllocatorType C> struct TestAllocator;
193
194template<>
195struct TestAllocator<AllocatorType::FastMemoryArea> {
196 static std::string name () { return "Ase::FastMemoryArea"; }
197 static FastMemory::Block allocate_block (uint32 length)
198 { return fast_memory_arena.allocate (length); }
199 static void release_block (FastMemory::Block block)
200 { fast_memory_arena.release (block); }
201};
202
203template<>
204struct TestAllocator<AllocatorType::PosixMemalign> {
205 static std::string name () { return "posix_memalign"; }
206 static FastMemory::Block
207 allocate_block (uint32 length)
208 {
209 void *ptr = nullptr;
210 const int posix_memalign_result = posix_memalign (&ptr, FastMemory::cache_line_size, length);
211 TASSERT (posix_memalign_result == 0);
212 memset (ptr, 0, length); // match calloc() semantics
213 return { ptr, length };
214 }
215 static void
216 release_block (FastMemory::Block block)
217 {
218 memset (block.block_start, 0, block.block_length); // match release_aligned_block() timing
219 free (block.block_start);
220 }
221};
222
223template<>
224struct TestAllocator<AllocatorType::FastMemAlloc> {
225 static std::string name () { return "fast_mem_alloc"; }
226 static FastMemory::Block
227 allocate_block (uint32 length)
228 {
229 void *ptr = fast_mem_alloc (length);
230 TASSERT (ptr != nullptr);
231 return { ptr, length };
232 }
233 static void
234 release_block (FastMemory::Block block)
235 {
236 fast_mem_free (block.block_start);
237 }
238};
239
240template<>
241struct TestAllocator<AllocatorType::LoftAlloc> {
242 static std::string name () { return "loft_calloc (cacheline)"; }
243 static FastMemory::Block
244 allocate_block (uint32 length)
245 {
246 LoftPtr<void> lptr = loft_calloc (length, 1);
247 TASSERT (lptr.get() != nullptr);
248 size_t size = lptr.get_deleter().size;
249 return { lptr.release(), uint32_t (size) };
250 }
251 static void
252 release_block (FastMemory::Block block)
253 {
254 LoftPtr<void> lptr (block.block_start, { block.block_length });
255 }
256};
257
258template<>
259struct TestAllocator<AllocatorType::LibcCalloc> {
260 static std::string name () { return "::calloc (misaligned)"; }
261 static FastMemory::Block
262 allocate_block (uint32 length)
263 {
264 void *ptr = calloc (length, 1);
265 TASSERT (ptr != nullptr);
266 return { ptr, length };
267 }
268 static void
269 release_block (FastMemory::Block block)
270 {
271 free (block.block_start);
272 }
273};
274
275/* Use a simple, fast dedicated RNG, because:
276 * a) we need to be able to reset the RNG to compare results from different runs;
277 * b) it should be really fast to not affect the allocator benchmarking.
278 */
279static uint32 quick_rand32_seed = 2147483563;
280static inline uint32
281quick_rand32 ()
282{
283 quick_rand32_seed = 1664525 * quick_rand32_seed + 1013904223;
284 return quick_rand32_seed;
285}
286
287template<AllocatorType AT> static void
288ase_aligned_allocator_benchloop (uint32 seed)
289{
290 constexpr const size_t ARUNS = 3;
291 constexpr const int64 MAX_CHUNK_SIZE = 3 * 1024;
292 constexpr const int64 N_ALLOCS = 2048;
293 constexpr const int64 RESIDENT = N_ALLOCS / 3;
294 static_assert (MAX_CHUNK_SIZE * N_ALLOCS <= TEST_AREA_SIZE);
295 double accu = 0;
296 static FastMemory::Block blocks[N_ALLOCS];
297 auto loop_aa = [&] () {
298 quick_rand32_seed = seed;
299 for (size_t r = 0; r < ARUNS; r++)
300 {
301 // allocate random sizes
302 for (size_t i = 0; i < N_ALLOCS; i++)
303 {
304 const uint32 rnd = quick_rand32();
305 const size_t length = MAX (8, (rnd * MAX_CHUNK_SIZE) >> 32);
306 blocks[i] = TestAllocator<AT>::allocate_block (length);
307 accu += *(double*) blocks[i].block_start;
308 TASSERT (blocks[i].block_length > 0);
309 if (i > RESIDENT && (i & 1))
310 {
311 FastMemory::Block &rblock = blocks[i - RESIDENT];
312 // Ase::printerr ("%d) FastMemoryBlock{%u,%x,%x,%p}\n", i, rblock.shm_id, rblock.mem_offset, rblock.mem_length, rblock.mem_start);
313 TestAllocator<AT>::release_block (rblock);
314 rblock = {};
315 }
316 }
317 // shuffle some blocks
318 for (size_t j = 0; j < N_ALLOCS / 2; j++)
319 {
320 const uint i1 = j * 2;
321 const uint i2 = (quick_rand32() * N_ALLOCS) >> 32;
322 const uint i3 = (i1 + i2) / 2;
323 if (i1 == i2 || i2 == i3 || i3 == i1)
324 continue; // avoid double free
325 const uint l1 = blocks[i1].block_length;
326 const uint l2 = blocks[i2].block_length;
327 const uint l3 = blocks[i3].block_length;
328 if (l1)
329 TestAllocator<AT>::release_block (blocks[i1]);
330 if (l2)
331 TestAllocator<AT>::release_block (blocks[i2]);
332 if (l3)
333 TestAllocator<AT>::release_block (blocks[i3]);
334 blocks[i2] = TestAllocator<AT>::allocate_block (l1 ? l1 : MAX_CHUNK_SIZE / 3);
335 blocks[i1] = TestAllocator<AT>::allocate_block (l3 ? l3 : MAX_CHUNK_SIZE / 3);
336 blocks[i3] = TestAllocator<AT>::allocate_block (l2 ? l2 : MAX_CHUNK_SIZE / 3);
337 accu += *(double*) blocks[i2].block_start;
338 accu += *(double*) blocks[i1].block_start;
339 accu += *(double*) blocks[i3].block_start;
340 }
341 // release blocks randomized (frees ca 59%)
342 for (size_t j = 0; j < N_ALLOCS; j++)
343 {
344 const uint i = (quick_rand32() * N_ALLOCS) >> 32;
345 if (!blocks[i].block_length)
346 continue;
347 TestAllocator<AT>::release_block (blocks[i]);
348 blocks[i] = {};
349 }
350 // release everything
351 for (size_t i = 0; i < N_ALLOCS; i++)
352 if (blocks[i].block_length)
353 {
354 TestAllocator<AT>::release_block (blocks[i]);
355 blocks[i] = {};
356 }
357 }
358 };
359 Ase::Test::Timer timer (0.1);
360 const double bench_aa = timer.benchmark (loop_aa);
361 const size_t n_allocations = ARUNS * N_ALLOCS * (1 + 3.0 / 2);
362 const double ns_p_a = 1000000000.0 * bench_aa / n_allocations;
363 Ase::printerr (" BENCH %-25s %u allocations in %.1f msecs, %.1fnsecs/allocation\n",
364 TestAllocator<AT>::name() + ":", n_allocations, 1000 * bench_aa, ns_p_a);
365 TASSERT (accu == 0);
366}
367
368TEST_BENCHMARK (zbench_aligned_allocator_aligned_block);
369static void
370zbench_aligned_allocator_aligned_block()
371{
372 ensure_block_allocator_initialization();
373 ase_aligned_allocator_benchloop<AllocatorType::FastMemoryArea> (2654435769);
374}
375
376TEST_BENCHMARK (zbench_aligned_allocator_memalign);
377static void
378zbench_aligned_allocator_memalign()
379{
380 ensure_block_allocator_initialization();
381 ase_aligned_allocator_benchloop<AllocatorType::PosixMemalign> (2654435769);
382 // phi = 1.61803398874989484820458683436563811772030917980576286213544862270526046281890244970720720418939113748475
383 // 2^64 / phi = 11400714819323198485.95161058762180694985
384 // 2^32 / phi = 2654435769.49723029647758477079
385}
386
387TEST_BENCHMARK (zbench_aligned_allocator_calloc);
388static void
389zbench_aligned_allocator_calloc()
390{
391 ensure_block_allocator_initialization();
392 ase_aligned_allocator_benchloop<AllocatorType::LibcCalloc> (2654435769);
393}
394
395TEST_BENCHMARK (zbench_aligned_allocator_fast_mem_alloc);
396static void
397zbench_aligned_allocator_fast_mem_alloc()
398{
399 ensure_block_allocator_initialization();
400 ase_aligned_allocator_benchloop<AllocatorType::FastMemAlloc> (2654435769);
401}
402
403TEST_BENCHMARK (zbench_aligned_allocator_loft_alloc);
404static void
405zbench_aligned_allocator_loft_alloc()
406{
407 ensure_block_allocator_initialization();
408 ase_aligned_allocator_benchloop<AllocatorType::LoftAlloc> (2654435769);
409}
410
411} // Anon
T c_str(T... args)
calloc
free
T get_deleter(T... args)
T get(T... args)
#define TEST_BENCHMARK(FUNC)
Register func as a benchmark test.
Definition internal.hh:81
#define MAX(a, b)
Yield maximum of a and b.
Definition internal.hh:52
memset
constexpr size_t cache_line_size
Minimum alignment >= cache line size, see getconf LEVEL1_DCACHE_LINESIZE.
Definition memory.hh:13
The Anklang C++ API namespace.
Definition api.hh:9
std::string utf8encode(const uint32_t *codepoints, size_t n_codepoints)
Convert codepoints into an UTF-8 string, using the shortest possible encoding.
Definition unicode.cc:249
size_t utf8len(const char *str)
Count valid UTF-8 sequences, invalid sequences are counted as Latin-1 characters.
Definition unicode.cc:184
int64_t int64
A 64-bit unsigned integer, use PRI*64 in format strings.
Definition cxxaux.hh:29
size_t utf8_to_unicode(const char *str, uint32_t *codepoints)
Definition unicode.cc:221
constexpr bool unicode_is_assigned(uint32_t u)
Return whether u matches any of the assigned Unicode planes.
Definition unicode.hh:33
uint32_t uint32
A 32-bit unsigned integer.
Definition cxxaux.hh:24
uint32_t uint
Provide 'uint' as convenience type.
Definition cxxaux.hh:18
posix_memalign
T release(T... args)
T resize(T... args)
T size(T... args)
typedef uint32_t
Memory area (over-)aligned to cache size and utilizing huge pages.
Definition memory.hh:100
Reference for an allocated memory block.
Definition memory.hh:90
#define TASSERT(cond)
Unconditional test assertion, enters breakpoint if not fullfilled.
Definition testing.hh:24
#define TCMP(a, cmp, b)
Compare a and b according to operator cmp, verbose on failiure.
Definition testing.hh:23