11static constexpr size_t RUNS = 1;
12static constexpr double MAXTIME = 0.15;
13static constexpr double M = 1000000;
17all_codepoints_to_utf8 ()
20 for (
size_t i = 1; i <= Ase::unicode_last_codepoint; i++)
22 codepoints.push_back (i);
35 auto loop_g_utf8_to_ucs4_fast = [&] () {
36 for (
size_t j = 0; j < RUNS; j++)
39 gunichar *gu = g_utf8_to_ucs4_fast (big.
c_str(), -1, &long_result);
40 gulong result = long_result;
41 TCMP (expected, ==, result);
45 bench_time = timer.benchmark (loop_g_utf8_to_ucs4_fast);
46 Ase::printerr (
" BENCH g_utf8_to_ucs4_fast: %11.1f MChar/s\n", big.
size() * RUNS / bench_time / M);
48 auto loop_ase_utf8_to_unicode = [&] () {
49 for (
size_t j = 0; j < RUNS; j++)
52 codepoints.resize (expected);
54 TCMP (expected, ==, result);
57 bench_time = timer.benchmark (loop_ase_utf8_to_unicode);
58 Ase::printerr (
" BENCH Ase::utf8_to_unicode: %11.1f MChar/s\n", big.
size() * RUNS / bench_time / M);
61 codepoints.resize (expected);
62 auto loop_inplace_utf8_to_unicode = [&] () {
63 for (
size_t j = 0; j < RUNS; j++)
66 TCMP (expected, ==, result);
69 bench_time = timer.benchmark (loop_inplace_utf8_to_unicode);
70 Ase::printerr (
" BENCH utf8_to_unicode inplace: %11.1f MChar/s\n", big.
size() * RUNS / bench_time / M);
73 gunichar *gu = g_utf8_to_ucs4_fast (big.
c_str(), -1, &gresult);
74 TASSERT (expected == (
size_t) gresult);
75 for (
size_t i = 0; i < expected; i++)
76 if (gu[i] != codepoints[i])
78 Ase::printerr (
" BENCH 0x%06x) 0x%06x != 0x%06x\n", i + 1, gu[i], codepoints[i]);
79 TCMP (gu[i], ==, codepoints[i]);
89 length += (c & 0xC0) != 0x80;
100 auto glib_utf8len_loop = [&] () {
101 for (
size_t j = 0; j < RUNS; j++)
103 size_t result = g_utf8_strlen (str.
c_str(), -1);
104 TCMP (expected, ==, result);
107 bench_time = timer.benchmark (glib_utf8len_loop);
108 Ase::printerr (
" BENCH g_utf8_strlen: %11.1f MChar/s %s\n", str.
size() * RUNS / bench_time / M, what);
110 auto ase_utf8len_s_loop = [&] () {
111 for (
size_t j = 0; j < RUNS; j++)
114 TCMP (expected, ==, result);
117 bench_time = timer.benchmark (ase_utf8len_s_loop);
118 Ase::printerr (
" BENCH Ase::utf8len(string&): %11.1f MChar/s %s\n", str.
size() * RUNS / bench_time / M, what);
120 auto ase_utf8len_c_loop = [&] () {
121 for (
size_t j = 0; j < RUNS; j++)
124 TCMP (expected, ==, result);
127 bench_time = timer.benchmark (ase_utf8len_c_loop);
128 Ase::printerr (
" BENCH Ase::utf8len(char*): %11.1f MChar/s %s\n", str.
size() * RUNS / bench_time / M, what);
130 auto simple_utf8len_loop = [&] () {
131 for (
size_t j = 0; j < RUNS; j++)
133 size_t result = not_0x80_strlen_utf8 (str);
134 TCMP (expected, ==, result);
137 bench_time = timer.benchmark (simple_utf8len_loop);
138 Ase::printerr (
" BENCH not_0x80_strlen_utf8: %11.1f MChar/s %s\n", str.
size() * RUNS / bench_time / M, what);
143utf8_strlen_bench_high_planes()
146 utf8_strlen_bench (big,
"(high planes)");
151utf8_strlen_bench_ascii()
154 big.
resize (Ase::unicode_last_codepoint * 1.07);
155 for (
size_t i = 0; i < big.
size(); i++)
156 big[i] = 1 + i % 0x7F;
157 utf8_strlen_bench (big,
"(ascii)");
164#define TEST_AREA_SIZE (16 * 1024 * 1024)
169ensure_block_allocator_initialization()
171 fast_mem_free (fast_mem_alloc (1024));
172 const size_t areasize = 4 * 1024 * 1024;
173 TASSERT (fast_memory_arena.reserved() >= areasize);
178 fast_memory_arena.release (b1);
179 fast_memory_arena.release (b2);
180 fast_memory_arena.release (b3);
181 fast_memory_arena.release (b4);
184enum class AllocatorType {
192template<AllocatorType C>
struct TestAllocator;
195struct TestAllocator<AllocatorType::FastMemoryArea> {
196 static std::string name () {
return "Ase::FastMemoryArea"; }
198 {
return fast_memory_arena.allocate (length); }
200 { fast_memory_arena.release (block); }
204struct TestAllocator<AllocatorType::PosixMemalign> {
205 static std::string name () {
return "posix_memalign"; }
207 allocate_block (
uint32 length)
211 TASSERT (posix_memalign_result == 0);
213 return { ptr, length };
218 memset (block.block_start, 0, block.block_length);
219 free (block.block_start);
224struct TestAllocator<AllocatorType::FastMemAlloc> {
225 static std::string name () {
return "fast_mem_alloc"; }
227 allocate_block (
uint32 length)
229 void *ptr = fast_mem_alloc (length);
231 return { ptr, length };
236 fast_mem_free (block.block_start);
241struct TestAllocator<AllocatorType::LoftAlloc> {
242 static std::string name () {
return "loft_calloc (cacheline)"; }
244 allocate_block (
uint32 length)
254 LoftPtr<void> lptr (block.block_start, { block.block_length });
259struct TestAllocator<AllocatorType::LibcCalloc> {
260 static std::string name () {
return "::calloc (misaligned)"; }
262 allocate_block (
uint32 length)
264 void *ptr =
calloc (length, 1);
266 return { ptr, length };
271 free (block.block_start);
279static uint32 quick_rand32_seed = 2147483563;
283 quick_rand32_seed = 1664525 * quick_rand32_seed + 1013904223;
284 return quick_rand32_seed;
287template<AllocatorType AT>
static void
288ase_aligned_allocator_benchloop (
uint32 seed)
290 constexpr const size_t ARUNS = 3;
291 constexpr const int64 MAX_CHUNK_SIZE = 3 * 1024;
292 constexpr const int64 N_ALLOCS = 2048;
293 constexpr const int64 RESIDENT = N_ALLOCS / 3;
294 static_assert (MAX_CHUNK_SIZE * N_ALLOCS <= TEST_AREA_SIZE);
297 auto loop_aa = [&] () {
298 quick_rand32_seed = seed;
299 for (
size_t r = 0; r < ARUNS; r++)
302 for (
size_t i = 0; i < N_ALLOCS; i++)
304 const uint32 rnd = quick_rand32();
305 const size_t length =
MAX (8, (rnd * MAX_CHUNK_SIZE) >> 32);
306 blocks[i] = TestAllocator<AT>::allocate_block (length);
307 accu += *(
double*) blocks[i].block_start;
308 TASSERT (blocks[i].block_length > 0);
309 if (i > RESIDENT && (i & 1))
313 TestAllocator<AT>::release_block (rblock);
318 for (
size_t j = 0; j < N_ALLOCS / 2; j++)
320 const uint i1 = j * 2;
321 const uint i2 = (quick_rand32() * N_ALLOCS) >> 32;
322 const uint i3 = (i1 + i2) / 2;
323 if (i1 == i2 || i2 == i3 || i3 == i1)
325 const uint l1 = blocks[i1].block_length;
326 const uint l2 = blocks[i2].block_length;
327 const uint l3 = blocks[i3].block_length;
329 TestAllocator<AT>::release_block (blocks[i1]);
331 TestAllocator<AT>::release_block (blocks[i2]);
333 TestAllocator<AT>::release_block (blocks[i3]);
334 blocks[i2] = TestAllocator<AT>::allocate_block (l1 ? l1 : MAX_CHUNK_SIZE / 3);
335 blocks[i1] = TestAllocator<AT>::allocate_block (l3 ? l3 : MAX_CHUNK_SIZE / 3);
336 blocks[i3] = TestAllocator<AT>::allocate_block (l2 ? l2 : MAX_CHUNK_SIZE / 3);
337 accu += *(
double*) blocks[i2].block_start;
338 accu += *(
double*) blocks[i1].block_start;
339 accu += *(
double*) blocks[i3].block_start;
342 for (
size_t j = 0; j < N_ALLOCS; j++)
344 const uint i = (quick_rand32() * N_ALLOCS) >> 32;
345 if (!blocks[i].block_length)
347 TestAllocator<AT>::release_block (blocks[i]);
351 for (
size_t i = 0; i < N_ALLOCS; i++)
352 if (blocks[i].block_length)
354 TestAllocator<AT>::release_block (blocks[i]);
360 const double bench_aa = timer.benchmark (loop_aa);
361 const size_t n_allocations = ARUNS * N_ALLOCS * (1 + 3.0 / 2);
362 const double ns_p_a = 1000000000.0 * bench_aa / n_allocations;
363 Ase::printerr (
" BENCH %-25s %u allocations in %.1f msecs, %.1fnsecs/allocation\n",
364 TestAllocator<AT>::name() +
":", n_allocations, 1000 * bench_aa, ns_p_a);
370zbench_aligned_allocator_aligned_block()
372 ensure_block_allocator_initialization();
373 ase_aligned_allocator_benchloop<AllocatorType::FastMemoryArea> (2654435769);
378zbench_aligned_allocator_memalign()
380 ensure_block_allocator_initialization();
381 ase_aligned_allocator_benchloop<AllocatorType::PosixMemalign> (2654435769);
389zbench_aligned_allocator_calloc()
391 ensure_block_allocator_initialization();
392 ase_aligned_allocator_benchloop<AllocatorType::LibcCalloc> (2654435769);
397zbench_aligned_allocator_fast_mem_alloc()
399 ensure_block_allocator_initialization();
400 ase_aligned_allocator_benchloop<AllocatorType::FastMemAlloc> (2654435769);
405zbench_aligned_allocator_loft_alloc()
407 ensure_block_allocator_initialization();
408 ase_aligned_allocator_benchloop<AllocatorType::LoftAlloc> (2654435769);
#define TEST_BENCHMARK(FUNC)
Register func as a benchmark test.
#define MAX(a, b)
Yield maximum of a and b.
constexpr size_t cache_line_size
Minimum alignment >= cache line size, see getconf LEVEL1_DCACHE_LINESIZE.
The Anklang C++ API namespace.
std::string utf8encode(const uint32_t *codepoints, size_t n_codepoints)
Convert codepoints into an UTF-8 string, using the shortest possible encoding.
size_t utf8len(const char *str)
Count valid UTF-8 sequences, invalid sequences are counted as Latin-1 characters.
int64_t int64
A 64-bit unsigned integer, use PRI*64 in format strings.
size_t utf8_to_unicode(const char *str, uint32_t *codepoints)
constexpr bool unicode_is_assigned(uint32_t u)
Return whether u matches any of the assigned Unicode planes.
uint32_t uint32
A 32-bit unsigned integer.
uint32_t uint
Provide 'uint' as convenience type.
Memory area (over-)aligned to cache size and utilizing huge pages.
Reference for an allocated memory block.
#define TASSERT(cond)
Unconditional test assertion, enters breakpoint if not fullfilled.
#define TCMP(a, cmp, b)
Compare a and b according to operator cmp, verbose on failiure.