Skip to content

[mono] dn_simdhash_ght_compatible optimizations for ARM64 #113241

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions src/mono/mono/metadata/bundled-resources.c
Original file line number Diff line number Diff line change
Expand Up @@ -204,9 +204,7 @@ bundled_resources_get (const char *id)
char key_buffer[1024];
key_from_id(id, key_buffer, sizeof(key_buffer));

MonoBundledResource *result = NULL;
dn_simdhash_ght_try_get_value (bundled_resources, key_buffer, (void **)&result);
return result;
return (MonoBundledResource *)dn_simdhash_ght_get_value_or_default (bundled_resources, key_buffer);
}

//---------------------------------------------------------------------------------------
Expand Down
5 changes: 2 additions & 3 deletions src/mono/mono/metadata/class.c
Original file line number Diff line number Diff line change
Expand Up @@ -1252,7 +1252,7 @@ mono_class_inflate_generic_method_full_checked (MonoMethod *method, MonoClass *k
mono_mem_manager_lock (mm);
if (!mm->gmethod_cache)
mm->gmethod_cache = dn_simdhash_ght_new_full (inflated_method_hash, inflated_method_equal, NULL, (GDestroyNotify)free_inflated_method, 0, NULL);
dn_simdhash_ght_try_get_value (mm->gmethod_cache, iresult, (void **)&cached);
cached = dn_simdhash_ght_get_value_or_default (mm->gmethod_cache, iresult);
mono_mem_manager_unlock (mm);

if (cached) {
Expand Down Expand Up @@ -1358,8 +1358,7 @@ mono_class_inflate_generic_method_full_checked (MonoMethod *method, MonoClass *k

// check cache
mono_mem_manager_lock (mm);
cached = NULL;
dn_simdhash_ght_try_get_value (mm->gmethod_cache, iresult, (void **)&cached);
cached = dn_simdhash_ght_get_value_or_default (mm->gmethod_cache, iresult);
if (!cached) {
dn_simdhash_ght_insert (mm->gmethod_cache, iresult, iresult);
iresult->owner = mm;
Expand Down
5 changes: 2 additions & 3 deletions src/mono/mono/metadata/metadata.c
Original file line number Diff line number Diff line change
Expand Up @@ -3355,7 +3355,7 @@ mono_metadata_get_inflated_signature (MonoMethodSignature *sig, MonoGenericConte
mm->gsignature_cache = dn_simdhash_ght_new_full (inflated_signature_hash, inflated_signature_equal, NULL, (GDestroyNotify)free_inflated_signature, 256, NULL);

// FIXME: The lookup is done on the newly allocated sig so it always fails
dn_simdhash_ght_try_get_value (mm->gsignature_cache, &helper, (gpointer *)&res);
res = dn_simdhash_ght_get_value_or_default (mm->gsignature_cache, &helper);
if (!res) {
res = mono_mem_manager_alloc0 (mm, sizeof (MonoInflatedMethodSignature));
// FIXME: sig is an inflated signature not owned by the mem manager
Expand Down Expand Up @@ -3498,8 +3498,7 @@ mono_metadata_get_canonical_generic_inst (MonoGenericInst *candidate)
if (!mm->ginst_cache)
mm->ginst_cache = dn_simdhash_ght_new_full (mono_metadata_generic_inst_hash, mono_metadata_generic_inst_equal, NULL, (GDestroyNotify)free_generic_inst, 0, NULL);

MonoGenericInst *ginst = NULL;
dn_simdhash_ght_try_get_value (mm->ginst_cache, candidate, (void **)&ginst);
MonoGenericInst *ginst = dn_simdhash_ght_get_value_or_default (mm->ginst_cache, candidate);
if (!ginst) {
int size = MONO_SIZEOF_GENERIC_INST + type_argc * sizeof (MonoType *);
ginst = (MonoGenericInst *)mono_mem_manager_alloc0 (mm, size);
Expand Down
5 changes: 2 additions & 3 deletions src/mono/mono/mini/interp/transform.c
Original file line number Diff line number Diff line change
Expand Up @@ -2735,7 +2735,7 @@ interp_handle_intrinsics (TransformData *td, MonoMethod *target_method, MonoClas
default: return FALSE;
}
}
}
}

return FALSE;
}
Expand Down Expand Up @@ -10035,8 +10035,7 @@ mono_interp_transform_method (InterpMethod *imethod, ThreadContext *context, Mon

// FIXME Publishing of seq points seems to be racy with tiereing. We can have both tiered and untiered method
// running at the same time. We could therefore get the optimized imethod seq points for the unoptimized method.
gpointer seq_points = NULL;
dn_simdhash_ght_try_get_value (jit_mm->seq_points, imethod->method, (void **)&seq_points);
gpointer seq_points = dn_simdhash_ght_get_value_or_default (jit_mm->seq_points, imethod->method);
if (!seq_points || seq_points != imethod->jinfo->seq_points)
dn_simdhash_ght_replace (jit_mm->seq_points, imethod->method, imethod->jinfo->seq_points);
}
Expand Down
6 changes: 3 additions & 3 deletions src/mono/mono/mini/seq-points.c
Original file line number Diff line number Diff line change
Expand Up @@ -272,12 +272,12 @@ mono_get_seq_points (MonoMethod *method)
// FIXME:
jit_mm = get_default_jit_mm ();
jit_mm_lock (jit_mm);
dn_simdhash_ght_try_get_value (jit_mm->seq_points, method, (void **)&seq_points);
seq_points = dn_simdhash_ght_get_value_or_default (jit_mm->seq_points, method);
if (!seq_points && method->is_inflated) {
/* generic sharing + aot */
dn_simdhash_ght_try_get_value (jit_mm->seq_points, declaring_generic_method, (void **)&seq_points);
seq_points = dn_simdhash_ght_get_value_or_default (jit_mm->seq_points, declaring_generic_method);
if (!seq_points)
dn_simdhash_ght_try_get_value (jit_mm->seq_points, shared_method, (void **)&seq_points);
seq_points = dn_simdhash_ght_get_value_or_default (jit_mm->seq_points, shared_method);
}
jit_mm_unlock (jit_mm);

Expand Down
67 changes: 44 additions & 23 deletions src/native/containers/dn-simdhash-ght-compatible.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,6 @@ typedef struct dn_simdhash_ght_data {
dn_simdhash_ght_destroy_func value_destroy_func;
} dn_simdhash_ght_data;

static inline uint32_t
dn_simdhash_ght_hash (dn_simdhash_ght_data data, void * key)
{
dn_simdhash_ght_hash_func hash_func = data.hash_func;
if (hash_func)
return (uint32_t)hash_func(key);
else
// FIXME: Seed
return MurmurHash3_32_ptr(key, 0);
}

static inline int32_t
dn_simdhash_ght_equals (dn_simdhash_ght_data data, void * lhs, void * rhs)
{
dn_simdhash_ght_equal_func equal_func = data.key_equal_func;
if (equal_func)
return equal_func(lhs, rhs);
else
return lhs == rhs;
}

static inline void
dn_simdhash_ght_removed (dn_simdhash_ght_data data, void * key, void * value)
{
Expand Down Expand Up @@ -68,8 +47,13 @@ dn_simdhash_ght_replaced (dn_simdhash_ght_data data, void * old_key, void * new_
#define DN_SIMDHASH_KEY_T void *
#define DN_SIMDHASH_VALUE_T void *
#define DN_SIMDHASH_INSTANCE_DATA_T dn_simdhash_ght_data
#define DN_SIMDHASH_KEY_HASHER dn_simdhash_ght_hash
#define DN_SIMDHASH_KEY_EQUALS dn_simdhash_ght_equals

#define DN_SIMDHASH_SCAN_DATA_T dn_simdhash_ght_equal_func
#define DN_SIMDHASH_GET_SCAN_DATA(data) data.key_equal_func

#define DN_SIMDHASH_KEY_HASHER(data, key) (uint32_t)data.hash_func(key)
#define DN_SIMDHASH_KEY_EQUALS(scan_data, lhs, rhs) scan_data(lhs, rhs)

#define DN_SIMDHASH_ON_REMOVE dn_simdhash_ght_removed
#define DN_SIMDHASH_ON_REPLACE dn_simdhash_ght_replaced
#if SIZEOF_VOID_P == 8
Expand All @@ -81,13 +65,32 @@ dn_simdhash_ght_replaced (dn_simdhash_ght_data data, void * old_key, void * new_

#include "dn-simdhash-specialization.h"

static unsigned int
dn_simdhash_ght_default_hash (const void * key)
{
return (unsigned int)(size_t)key;
}

static int32_t
dn_simdhash_ght_default_comparer (const void * a, const void * b)
{
return a == b;
}

dn_simdhash_ght_t *
dn_simdhash_ght_new (
dn_simdhash_ght_hash_func hash_func, dn_simdhash_ght_equal_func key_equal_func,
uint32_t capacity, dn_allocator_t *allocator
)
{
dn_simdhash_ght_t *hash = dn_simdhash_new_internal(&DN_SIMDHASH_T_META, DN_SIMDHASH_T_VTABLE, capacity, allocator);
// Most users of dn_simdhash_ght are passing a custom comparer, and always doing an indirect call ends up being faster
// than conditionally doing a fast inline check when there's no comparer set. Somewhat counter-intuitive, but true
// on both x64 and arm64. Probably due to the smaller code size and reduced branch predictor pressure.
if (!hash_func)
hash_func = dn_simdhash_ght_default_hash;
if (!key_equal_func)
key_equal_func = dn_simdhash_ght_default_comparer;
dn_simdhash_instance_data(dn_simdhash_ght_data, hash).hash_func = hash_func;
dn_simdhash_instance_data(dn_simdhash_ght_data, hash).key_equal_func = key_equal_func;
return hash;
Expand All @@ -101,6 +104,10 @@ dn_simdhash_ght_new_full (
)
{
dn_simdhash_ght_t *hash = dn_simdhash_new_internal(&DN_SIMDHASH_T_META, DN_SIMDHASH_T_VTABLE, capacity, allocator);
if (!hash_func)
hash_func = dn_simdhash_ght_default_hash;
if (!key_equal_func)
key_equal_func = dn_simdhash_ght_default_comparer;
dn_simdhash_instance_data(dn_simdhash_ght_data, hash).hash_func = hash_func;
dn_simdhash_instance_data(dn_simdhash_ght_data, hash).key_equal_func = key_equal_func;
dn_simdhash_instance_data(dn_simdhash_ght_data, hash).key_destroy_func = key_destroy_func;
Expand Down Expand Up @@ -145,3 +152,17 @@ dn_simdhash_ght_insert_replace (
return;
}
}

void *
dn_simdhash_ght_get_value_or_default (
dn_simdhash_ght_t *hash, void * key
)
{
check_self(hash);
uint32_t key_hash = DN_SIMDHASH_KEY_HASHER(DN_SIMDHASH_GET_DATA(hash), key);
DN_SIMDHASH_VALUE_T *value_ptr = DN_SIMDHASH_FIND_VALUE_INTERNAL(hash, key, key_hash);
if (value_ptr)
return *value_ptr;
else
return NULL;
}
6 changes: 6 additions & 0 deletions src/native/containers/dn-simdhash-ght-compatible.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ dn_simdhash_ght_insert_replace (
int32_t overwrite_key
);

// faster and simpler to use than dn_simdhash_ght_try_get_value, use it wisely
void *
dn_simdhash_ght_get_value_or_default (
dn_simdhash_ght_t *hash, void * key
);

// compatibility shims for the g_hash_table_ versions in glib.h
#define dn_simdhash_ght_insert(h,k,v) dn_simdhash_ght_insert_replace ((h),(k),(v),FALSE)
#define dn_simdhash_ght_replace(h,k,v) dn_simdhash_ght_insert_replace ((h),(k),(v),TRUE)
Expand Down
4 changes: 2 additions & 2 deletions src/native/containers/dn-simdhash-ptr-ptr.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
#define DN_SIMDHASH_T dn_simdhash_ptr_ptr
#define DN_SIMDHASH_KEY_T void *
#define DN_SIMDHASH_VALUE_T void *
#define DN_SIMDHASH_KEY_HASHER(hash, key) (MurmurHash3_32_ptr(key, 0))
#define DN_SIMDHASH_KEY_EQUALS(hash, lhs, rhs) (lhs == rhs)
#define DN_SIMDHASH_KEY_HASHER(data, key) (MurmurHash3_32_ptr(key, 0))
#define DN_SIMDHASH_KEY_EQUALS(data, lhs, rhs) (lhs == rhs)
#if SIZEOF_VOID_P == 8
#define DN_SIMDHASH_BUCKET_CAPACITY 11
#else
Expand Down
4 changes: 2 additions & 2 deletions src/native/containers/dn-simdhash-ptrpair-ptr.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ dn_ptrpair_t_equals (dn_ptrpair_t lhs, dn_ptrpair_t rhs)
#define DN_SIMDHASH_T dn_simdhash_ptrpair_ptr
#define DN_SIMDHASH_KEY_T dn_ptrpair_t
#define DN_SIMDHASH_VALUE_T void *
#define DN_SIMDHASH_KEY_HASHER(hash, key) dn_ptrpair_t_hash(key)
#define DN_SIMDHASH_KEY_EQUALS(hash, lhs, rhs) dn_ptrpair_t_equals(lhs, rhs)
#define DN_SIMDHASH_KEY_HASHER(data, key) dn_ptrpair_t_hash(key)
#define DN_SIMDHASH_KEY_EQUALS(data, lhs, rhs) dn_ptrpair_t_equals(lhs, rhs)
#if SIZEOF_VOID_P == 8
// 192 bytes holds 12 16-byte blocks, so 11 keys and one suffix table
#define DN_SIMDHASH_BUCKET_CAPACITY 11
Expand Down
15 changes: 12 additions & 3 deletions src/native/containers/dn-simdhash-specialization.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@
#error Expected DN_SIMDHASH_VALUE_T definition i.e. int
#endif

// If specified, this data will be precalculated/prefetched at the start of scans
// and passed to your KEY_EQUALS macro as its first parameter
#ifndef DN_SIMDHASH_SCAN_DATA_T
#define DN_SIMDHASH_SCAN_DATA_T uint8_t
#define DN_SIMDHASH_GET_SCAN_DATA(data) 0
#endif

// If specified, we pass instance data to the handlers by-value, otherwise we
// pass the pointer to the hash itself by-value. This is enough to allow clang
// to hoist the load of the instance data out of the key scan loop, though it
Expand Down Expand Up @@ -204,11 +211,13 @@ DN_SIMDHASH_SCAN_BUCKET_INTERNAL (DN_SIMDHASH_T_PTR hash, bucket_t *restrict buc
#undef bucket_suffixes

if (DN_LIKELY(index < count)) {
DN_SIMDHASH_SCAN_DATA_T scan_data = DN_SIMDHASH_GET_SCAN_DATA(DN_SIMDHASH_GET_DATA(hash));
// HACK: Suppress unused variable warning for specializations that don't use scan_data
(void)(scan_data);

DN_SIMDHASH_KEY_T *key = &bucket->keys[index];
do {
// FIXME: Could be profitable to manually hoist the data load outside of the loop,
// if not out of SCAN_BUCKET_INTERNAL entirely. Clang appears to do LICM on it.
if (DN_SIMDHASH_KEY_EQUALS(DN_SIMDHASH_GET_DATA(hash), needle, *key))
if (DN_SIMDHASH_KEY_EQUALS(scan_data, needle, *key))
return index;
key++;
index++;
Expand Down
4 changes: 2 additions & 2 deletions src/native/containers/dn-simdhash-string-ptr.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ dn_simdhash_str_hash (dn_simdhash_str_key v1)
#define DN_SIMDHASH_T dn_simdhash_string_ptr
#define DN_SIMDHASH_KEY_T dn_simdhash_str_key
#define DN_SIMDHASH_VALUE_T void *
#define DN_SIMDHASH_KEY_HASHER(hash, key) dn_simdhash_str_hash(key)
#define DN_SIMDHASH_KEY_EQUALS(hash, lhs, rhs) dn_simdhash_str_equal(lhs, rhs)
#define DN_SIMDHASH_KEY_HASHER(data, key) dn_simdhash_str_hash(key)
#define DN_SIMDHASH_KEY_EQUALS(data, lhs, rhs) dn_simdhash_str_equal(lhs, rhs)
#define DN_SIMDHASH_ACCESSOR_SUFFIX _raw

// perfect cache alignment. 32-bit ptrs: 8-byte keys. 64-bit: 16-byte keys.
Expand Down
4 changes: 2 additions & 2 deletions src/native/containers/dn-simdhash-u32-ptr.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#define DN_SIMDHASH_T dn_simdhash_u32_ptr
#define DN_SIMDHASH_KEY_T uint32_t
#define DN_SIMDHASH_VALUE_T void *
#define DN_SIMDHASH_KEY_HASHER(hash, key) murmur3_fmix32(key)
#define DN_SIMDHASH_KEY_EQUALS(hash, lhs, rhs) (lhs == rhs)
#define DN_SIMDHASH_KEY_HASHER(data, key) murmur3_fmix32(key)
#define DN_SIMDHASH_KEY_EQUALS(data, lhs, rhs) (lhs == rhs)

#include "dn-simdhash-specialization.h"
8 changes: 3 additions & 5 deletions src/native/containers/simdhash-benchmark/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ benchmark_deps := $(wildcard ./*.c) $(wildcard ./*.h)
# I don't know why this is necessary
nodejs_path := $(shell which node)

benchmark_sources := ../dn-simdhash.c ../dn-vector.c ./benchmark.c ../dn-simdhash-u32-ptr.c ../dn-simdhash-string-ptr.c ./ghashtable.c ./all-measurements.c
benchmark_sources := ../dn-simdhash.c ../dn-vector.c ./benchmark.c ../dn-simdhash-u32-ptr.c ../dn-simdhash-string-ptr.c ../dn-simdhash-ght-compatible.c ./ghashtable.c ./all-measurements.c
common_options := -g -O3 -DNO_CONFIG_H -lm -DNDEBUG
ifeq ($(SIMD), 0)
wasm_options := -mbulk-memory
Expand All @@ -15,12 +15,10 @@ endif

benchmark-native: $(dn_deps) $(benchmark_deps)
clang $(benchmark_sources) $(common_options) -DSIZEOF_VOID_P=8
objdump -S -d --no-show-raw-insn ./a.out > ./a.dis

benchmark-wasm: $(dn_deps) $(benchmark_deps)
~/Projects/emscripten/emcc $(benchmark_sources) $(common_options) $(wasm_options) -DSIZEOF_VOID_P=4

disassemble-benchmark: benchmark-native benchmark-wasm
objdump -d ./a.out > ./a.dis
~/wabt/bin/wasm2wat ./a.out.wasm > ./a.wat

run-native: benchmark-native
Expand All @@ -29,5 +27,5 @@ run-native: benchmark-native
run-wasm: benchmark-wasm
$(nodejs_path) ./a.out.js $(FILTER)

default_target: disassemble-benchmark
default_target: benchmark-native

38 changes: 38 additions & 0 deletions src/native/containers/simdhash-benchmark/all-measurements.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,30 @@ static void destroy_instance_ght (void *data) {
g_hash_table_destroy((GHashTable *)data);
}


static void * create_instance_dnght () {
if (!random_u32s)
init_data();

return dn_simdhash_ght_new(NULL, NULL, 0, NULL);
}

static void * create_instance_dnght_random_values () {
if (!random_u32s)
init_data();

dn_simdhash_ght_t *result = dn_simdhash_ght_new(NULL, NULL, INNER_COUNT, NULL);
for (int i = 0; i < INNER_COUNT; i++) {
uint32_t key = *dn_vector_index_t(random_u32s, uint32_t, i);
dn_simdhash_ght_try_add(result, (gpointer)(size_t)key, (gpointer)(size_t)i);
}
return result;
}

static void destroy_instance_dnght (void *data) {
dn_simdhash_free((dn_simdhash_ght_t *)data);
}

#endif // MEASUREMENTS_IMPLEMENTATION

// These go outside the guard because we include this file multiple times.
Expand Down Expand Up @@ -217,3 +241,17 @@ MEASUREMENT(ght_find_missing_key, GHashTable *, create_instance_ght_random_value
dn_simdhash_assert(g_hash_table_lookup(data, (gpointer)(size_t)key) == NULL);
}
})

MEASUREMENT(dnght_find_random_keys, dn_simdhash_ght_t *, create_instance_dnght_random_values, destroy_instance_dnght, {
for (int i = 0; i < INNER_COUNT; i++) {
uint32_t key = *dn_vector_index_t(random_u32s, uint32_t, i);
dn_simdhash_assert(dn_simdhash_ght_get_value_or_default(data, (gpointer)(size_t)key) == (gpointer)(size_t)i);
}
})

MEASUREMENT(dnght_find_missing_key, dn_simdhash_ght_t *, create_instance_dnght_random_values, destroy_instance_dnght, {
for (int i = 0; i < INNER_COUNT; i++) {
uint32_t key = *dn_vector_index_t(random_unused_u32s, uint32_t, i);
dn_simdhash_assert(!dn_simdhash_ght_get_value_or_default(data, (gpointer)(size_t)key));
}
})
Loading