From 64a76c9ce1709beb20ba4638073979fe9eb43663 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Fri, 26 Jan 2024 11:06:41 -0800 Subject: [PATCH 1/2] Refactor dict lookup functions to use force inline helpers --- Objects/dictobject.c | 187 ++++++++++++++++++++++--------------------- 1 file changed, 94 insertions(+), 93 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index c5477ab15f8dc9..e202d886ddeed2 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -874,11 +874,14 @@ lookdict_index(PyDictKeysObject *k, Py_hash_t hash, Py_ssize_t index) Py_UNREACHABLE(); } -// Search non-Unicode key from Unicode table -static Py_ssize_t -unicodekeys_lookup_generic(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) +typedef void *(*generic_entries_func)(PyDictKeysObject *dk); + +static inline Py_ALWAYS_INLINE Py_ssize_t +do_lookup(PyDictObject *mp, PyDictKeysObject *dk, PyObject *key, Py_hash_t hash, + generic_entries_func entries, + Py_ssize_t (*check_lookup)(PyDictObject *, PyDictKeysObject *, void *, Py_ssize_t ix, PyObject *key, Py_hash_t)) { - PyDictUnicodeEntry *ep0 = DK_UNICODE_ENTRIES(dk); + void *ep0 = entries(dk); size_t mask = DK_MASK(dk); size_t perturb = hash; size_t i = (size_t)hash & mask; @@ -886,57 +889,22 @@ unicodekeys_lookup_generic(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key for (;;) { ix = dictkeys_get_index(dk, i); if (ix >= 0) { - PyDictUnicodeEntry *ep = &ep0[ix]; - assert(ep->me_key != NULL); - assert(PyUnicode_CheckExact(ep->me_key)); - if (ep->me_key == key) { + ix = check_lookup(mp, dk, ep0, ix, key, hash); + if (ix != DKIX_DUMMY) { return ix; } - if (unicode_get_hash(ep->me_key) == hash) { - PyObject *startkey = ep->me_key; - Py_INCREF(startkey); - int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); - Py_DECREF(startkey); - if (cmp < 0) { - return DKIX_ERROR; - } - if (dk == mp->ma_keys && ep->me_key == startkey) { - if (cmp > 0) { - return ix; - } - } - else { - /* The dict was mutated, restart */ - return DKIX_KEY_CHANGED; - } - } } else if (ix == DKIX_EMPTY) { return DKIX_EMPTY; } perturb >>= PERTURB_SHIFT; i = mask & (i*5 + perturb + 1); - } - Py_UNREACHABLE(); -} -// Search Unicode key from Unicode table. -static Py_ssize_t _Py_HOT_FUNCTION -unicodekeys_lookup_unicode(PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) -{ - PyDictUnicodeEntry *ep0 = DK_UNICODE_ENTRIES(dk); - size_t mask = DK_MASK(dk); - size_t perturb = hash; - size_t i = (size_t)hash & mask; - Py_ssize_t ix; - for (;;) { + // Manual loop unrolling ix = dictkeys_get_index(dk, i); if (ix >= 0) { - PyDictUnicodeEntry *ep = &ep0[ix]; - assert(ep->me_key != NULL); - assert(PyUnicode_CheckExact(ep->me_key)); - if (ep->me_key == key || - (unicode_get_hash(ep->me_key) == hash && unicode_eq(ep->me_key, key))) { + ix = check_lookup(mp, dk, ep0, ix, key, hash); + if (ix != DKIX_DUMMY) { return ix; } } @@ -945,69 +913,102 @@ unicodekeys_lookup_unicode(PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) } perturb >>= PERTURB_SHIFT; i = mask & (i*5 + perturb + 1); - // Manual loop unrolling - ix = dictkeys_get_index(dk, i); - if (ix >= 0) { - PyDictUnicodeEntry *ep = &ep0[ix]; - assert(ep->me_key != NULL); - assert(PyUnicode_CheckExact(ep->me_key)); - if (ep->me_key == key || - (unicode_get_hash(ep->me_key) == hash && unicode_eq(ep->me_key, key))) { + } + Py_UNREACHABLE(); +} + +static inline Py_ALWAYS_INLINE +Py_ssize_t compare_unicode_generic(PyDictObject *mp, PyDictKeysObject *dk, + void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash) +{ + PyDictUnicodeEntry *ep = &((PyDictUnicodeEntry *)ep0)[ix]; + assert(ep->me_key != NULL); + assert(PyUnicode_CheckExact(ep->me_key)); + assert(!PyUnicode_CheckExact(key)); + // TODO: Thread safety + + if (unicode_get_hash(ep->me_key) == hash) { + PyObject *startkey = ep->me_key; + Py_INCREF(startkey); + int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); + Py_DECREF(startkey); + if (cmp < 0) { + return DKIX_ERROR; + } + if (dk == mp->ma_keys && ep->me_key == startkey) { + if (cmp > 0) { return ix; } } - else if (ix == DKIX_EMPTY) { - return DKIX_EMPTY; + else { + /* The dict was mutated, restart */ + return DKIX_KEY_CHANGED; } - perturb >>= PERTURB_SHIFT; - i = mask & (i*5 + perturb + 1); } - Py_UNREACHABLE(); + return DKIX_DUMMY; } -// Search key from Generic table. +// Search non-Unicode key from Unicode table static Py_ssize_t -dictkeys_generic_lookup(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) +unicodekeys_lookup_generic(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) { - PyDictKeyEntry *ep0 = DK_ENTRIES(dk); - size_t mask = DK_MASK(dk); - size_t perturb = hash; - size_t i = (size_t)hash & mask; - Py_ssize_t ix; - for (;;) { - ix = dictkeys_get_index(dk, i); - if (ix >= 0) { - PyDictKeyEntry *ep = &ep0[ix]; - assert(ep->me_key != NULL); - if (ep->me_key == key) { + return do_lookup(mp, dk, key, hash, (generic_entries_func)DK_UNICODE_ENTRIES, compare_unicode_generic); +} + +static inline Py_ALWAYS_INLINE +Py_ssize_t compare_unicode_unicode(PyDictObject *mp, PyDictKeysObject *dk, + void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash) +{ + PyDictUnicodeEntry *ep = &((PyDictUnicodeEntry *)ep0)[ix]; + assert(ep->me_key != NULL); + assert(PyUnicode_CheckExact(ep->me_key)); + if (ep->me_key == key || + (unicode_get_hash(ep->me_key) == hash && unicode_eq(ep->me_key, key))) { + return ix; + } + return DKIX_DUMMY; +} + +static Py_ssize_t _Py_HOT_FUNCTION +unicodekeys_lookup_unicode(PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) +{ + return do_lookup(NULL, dk, key, hash, (generic_entries_func)DK_UNICODE_ENTRIES, compare_unicode_unicode); +} + +static inline Py_ALWAYS_INLINE +Py_ssize_t compare_generic(PyDictObject *mp, PyDictKeysObject *dk, + void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash) +{ + PyDictKeyEntry *ep = &((PyDictKeyEntry *)ep0)[ix]; + assert(ep->me_key != NULL); + if (ep->me_key == key) { + return ix; + } + if (ep->me_hash == hash) { + PyObject *startkey = ep->me_key; + Py_INCREF(startkey); + int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); + Py_DECREF(startkey); + if (cmp < 0) { + return DKIX_ERROR; + } + if (dk == mp->ma_keys && ep->me_key == startkey) { + if (cmp > 0) { return ix; } - if (ep->me_hash == hash) { - PyObject *startkey = ep->me_key; - Py_INCREF(startkey); - int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); - Py_DECREF(startkey); - if (cmp < 0) { - return DKIX_ERROR; - } - if (dk == mp->ma_keys && ep->me_key == startkey) { - if (cmp > 0) { - return ix; - } - } - else { - /* The dict was mutated, restart */ - return DKIX_KEY_CHANGED; - } - } } - else if (ix == DKIX_EMPTY) { - return DKIX_EMPTY; + else { + /* The dict was mutated, restart */ + return DKIX_KEY_CHANGED; } - perturb >>= PERTURB_SHIFT; - i = mask & (i*5 + perturb + 1); } - Py_UNREACHABLE(); + return DKIX_DUMMY; +} + +static Py_ssize_t +dictkeys_generic_lookup(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) +{ + return do_lookup(mp, dk, key, hash, (generic_entries_func)DK_ENTRIES, compare_generic); } /* Lookup a string in a (all unicode) dict keys. From bd4fba91820cdae6b0360599056c0b40391125c8 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Mon, 29 Jan 2024 15:50:06 -0800 Subject: [PATCH 2/2] Use 1/0 for successful comparison eq/ne --- Objects/dictobject.c | 59 +++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 31 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index e202d886ddeed2..23d7e9b5e38a35 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -874,14 +874,11 @@ lookdict_index(PyDictKeysObject *k, Py_hash_t hash, Py_ssize_t index) Py_UNREACHABLE(); } -typedef void *(*generic_entries_func)(PyDictKeysObject *dk); - static inline Py_ALWAYS_INLINE Py_ssize_t do_lookup(PyDictObject *mp, PyDictKeysObject *dk, PyObject *key, Py_hash_t hash, - generic_entries_func entries, Py_ssize_t (*check_lookup)(PyDictObject *, PyDictKeysObject *, void *, Py_ssize_t ix, PyObject *key, Py_hash_t)) { - void *ep0 = entries(dk); + void *ep0 = _DK_ENTRIES(dk); size_t mask = DK_MASK(dk); size_t perturb = hash; size_t i = (size_t)hash & mask; @@ -889,8 +886,10 @@ do_lookup(PyDictObject *mp, PyDictKeysObject *dk, PyObject *key, Py_hash_t hash, for (;;) { ix = dictkeys_get_index(dk, i); if (ix >= 0) { - ix = check_lookup(mp, dk, ep0, ix, key, hash); - if (ix != DKIX_DUMMY) { + Py_ssize_t cmp = check_lookup(mp, dk, ep0, ix, key, hash); + if (cmp < 0) { + return cmp; + } else if (cmp) { return ix; } } @@ -903,8 +902,10 @@ do_lookup(PyDictObject *mp, PyDictKeysObject *dk, PyObject *key, Py_hash_t hash, // Manual loop unrolling ix = dictkeys_get_index(dk, i); if (ix >= 0) { - ix = check_lookup(mp, dk, ep0, ix, key, hash); - if (ix != DKIX_DUMMY) { + Py_ssize_t cmp = check_lookup(mp, dk, ep0, ix, key, hash); + if (cmp < 0) { + return cmp; + } else if (cmp) { return ix; } } @@ -917,9 +918,9 @@ do_lookup(PyDictObject *mp, PyDictKeysObject *dk, PyObject *key, Py_hash_t hash, Py_UNREACHABLE(); } -static inline Py_ALWAYS_INLINE -Py_ssize_t compare_unicode_generic(PyDictObject *mp, PyDictKeysObject *dk, - void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash) +static inline Py_ALWAYS_INLINE Py_ssize_t +compare_unicode_generic(PyDictObject *mp, PyDictKeysObject *dk, + void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash) { PyDictUnicodeEntry *ep = &((PyDictUnicodeEntry *)ep0)[ix]; assert(ep->me_key != NULL); @@ -936,53 +937,51 @@ Py_ssize_t compare_unicode_generic(PyDictObject *mp, PyDictKeysObject *dk, return DKIX_ERROR; } if (dk == mp->ma_keys && ep->me_key == startkey) { - if (cmp > 0) { - return ix; - } + return cmp; } else { /* The dict was mutated, restart */ return DKIX_KEY_CHANGED; } } - return DKIX_DUMMY; + return 0; } // Search non-Unicode key from Unicode table static Py_ssize_t unicodekeys_lookup_generic(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) { - return do_lookup(mp, dk, key, hash, (generic_entries_func)DK_UNICODE_ENTRIES, compare_unicode_generic); + return do_lookup(mp, dk, key, hash, compare_unicode_generic); } -static inline Py_ALWAYS_INLINE -Py_ssize_t compare_unicode_unicode(PyDictObject *mp, PyDictKeysObject *dk, - void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash) +static inline Py_ALWAYS_INLINE Py_ssize_t +compare_unicode_unicode(PyDictObject *mp, PyDictKeysObject *dk, + void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash) { PyDictUnicodeEntry *ep = &((PyDictUnicodeEntry *)ep0)[ix]; assert(ep->me_key != NULL); assert(PyUnicode_CheckExact(ep->me_key)); if (ep->me_key == key || (unicode_get_hash(ep->me_key) == hash && unicode_eq(ep->me_key, key))) { - return ix; + return 1; } - return DKIX_DUMMY; + return 0; } static Py_ssize_t _Py_HOT_FUNCTION unicodekeys_lookup_unicode(PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) { - return do_lookup(NULL, dk, key, hash, (generic_entries_func)DK_UNICODE_ENTRIES, compare_unicode_unicode); + return do_lookup(NULL, dk, key, hash, compare_unicode_unicode); } -static inline Py_ALWAYS_INLINE -Py_ssize_t compare_generic(PyDictObject *mp, PyDictKeysObject *dk, - void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash) +static inline Py_ALWAYS_INLINE Py_ssize_t +compare_generic(PyDictObject *mp, PyDictKeysObject *dk, + void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash) { PyDictKeyEntry *ep = &((PyDictKeyEntry *)ep0)[ix]; assert(ep->me_key != NULL); if (ep->me_key == key) { - return ix; + return 1; } if (ep->me_hash == hash) { PyObject *startkey = ep->me_key; @@ -993,22 +992,20 @@ Py_ssize_t compare_generic(PyDictObject *mp, PyDictKeysObject *dk, return DKIX_ERROR; } if (dk == mp->ma_keys && ep->me_key == startkey) { - if (cmp > 0) { - return ix; - } + return cmp; } else { /* The dict was mutated, restart */ return DKIX_KEY_CHANGED; } } - return DKIX_DUMMY; + return 0; } static Py_ssize_t dictkeys_generic_lookup(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) { - return do_lookup(mp, dk, key, hash, (generic_entries_func)DK_ENTRIES, compare_generic); + return do_lookup(mp, dk, key, hash, compare_generic); } /* Lookup a string in a (all unicode) dict keys.