From 1e2ce9cef628acb8e83d98a182ffd6c1e9cf3eaa Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Mon, 21 Feb 2022 20:11:27 -0500 Subject: [PATCH 1/6] Implement LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE --- Include/opcode.h | 1 + Lib/opcode.py | 1 + Python/ceval.c | 83 +++++++++++++++++++++++++++++++++++++++-- Python/opcode_targets.h | 2 +- Python/specialize.c | 10 +++++ 5 files changed, 93 insertions(+), 4 deletions(-) diff --git a/Include/opcode.h b/Include/opcode.h index df93a93fbbd989..1179c207dafaae 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -178,6 +178,7 @@ extern "C" { #define LOAD_FAST__LOAD_CONST 161 #define LOAD_CONST__LOAD_FAST 167 #define STORE_FAST__STORE_FAST 168 +#define LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE 169 #define DO_TRACING 255 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { diff --git a/Lib/opcode.py b/Lib/opcode.py index 95792459c377a1..5b8ee0f13180cf 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -292,6 +292,7 @@ def jabs_op(name, op): "LOAD_FAST__LOAD_CONST", "LOAD_CONST__LOAD_FAST", "STORE_FAST__STORE_FAST", + "LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE", ] _specialization_stats = [ "success", diff --git a/Python/ceval.c b/Python/ceval.c index 471bbde46f9db0..d94aafc3a9117a 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1770,6 +1770,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr } TARGET(LOAD_FAST) { + PREDICTED(LOAD_FAST); PyObject *value = GETLOCAL(oparg); if (value == NULL) { goto unbound_local_error; @@ -3446,6 +3447,35 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr } } + TARGET(LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE) { + assert(cframe.use_tracing == 0); + PyObject *owner = GETLOCAL(oparg); // borrowed + if (owner == NULL) { + goto unbound_local_error; + } + // GET_CACHE(), but for the following opcode + assert(_Py_OPCODE(*next_instr) == LOAD_ATTR_INSTANCE_VALUE); + SpecializedCacheEntry *caches = _GetSpecializedCacheEntryForInstruction( + first_instr, INSTR_OFFSET() + 1, _Py_OPARG(*next_instr)); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + _PyAttrCache *cache1 = &caches[-1].attr; + assert(cache1->tp_version != 0); + PyTypeObject *tp = Py_TYPE(owner); + // Note: these DEOPT_IFs jump back to LOAD_FAST. + DEOPT_IF(tp->tp_version_tag != cache1->tp_version, + LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE); + assert(tp->tp_dictoffset < 0); + assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); + PyDictValues *values = *_PyObject_ValuesPointer(owner); + DEOPT_IF(values == NULL, LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE); + PyObject *res = values->values[cache0->index]; + DEOPT_IF(res == NULL, LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE); + STAT_INC(LOAD_ATTR, hit); + PUSH(Py_NewRef(res)); + next_instr++; + NOTRACE_DISPATCH(); + } + TARGET(LOAD_ATTR_INSTANCE_VALUE) { assert(cframe.use_tracing == 0); PyObject *owner = TOP(); @@ -3455,13 +3485,13 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr _PyAdaptiveEntry *cache0 = &caches[0].adaptive; _PyAttrCache *cache1 = &caches[-1].attr; assert(cache1->tp_version != 0); - DEOPT_IF(tp->tp_version_tag != cache1->tp_version, LOAD_ATTR); + DEOPT_IF(tp->tp_version_tag != cache1->tp_version, LOAD_ATTR_INSTANCE_VALUE); assert(tp->tp_dictoffset < 0); assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictValues *values = *_PyObject_ValuesPointer(owner); - DEOPT_IF(values == NULL, LOAD_ATTR); + DEOPT_IF(values == NULL, LOAD_ATTR_INSTANCE_VALUE); res = values->values[cache0->index]; - DEOPT_IF(res == NULL, LOAD_ATTR); + DEOPT_IF(res == NULL, LOAD_ATTR_INSTANCE_VALUE); STAT_INC(LOAD_ATTR, hit); Py_INCREF(res); SET_TOP(res); @@ -5430,6 +5460,53 @@ MISS_WITH_CACHE(BINARY_SUBSCR) MISS_WITH_CACHE(UNPACK_SEQUENCE) MISS_WITH_OPARG_COUNTER(STORE_SUBSCR) +LOAD_ATTR_INSTANCE_VALUE_miss: + { + // Special-cased so that if LOAD_ATTR_INSTANCE_VALUE + // gets replaced, then any preceeding + // LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE gets replaced as well + STAT_INC(LOAD_ATTR_INSTANCE_VALUE, miss); + STAT_INC(LOAD_ATTR, miss); + _PyAdaptiveEntry *cache = &GET_CACHE()->adaptive; + cache->counter--; + if (cache->counter == 0) { + next_instr[-1] = _Py_MAKECODEUNIT(LOAD_ATTR_ADAPTIVE, _Py_OPARG(next_instr[-1])); + if (_Py_OPCODE(next_instr[-2]) == LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE) { + next_instr[-2] = _Py_MAKECODEUNIT(LOAD_FAST, _Py_OPARG(next_instr[-2])); + if (_Py_OPCODE(next_instr[-3]) == LOAD_FAST) { + next_instr[-3] = _Py_MAKECODEUNIT(LOAD_FAST__LOAD_FAST, _Py_OPARG(next_instr[-3])); + } + } + STAT_INC(opname, deopt); + cache_backoff(cache); + } + oparg = cache->original_oparg; + JUMP_TO_INSTRUCTION(LOAD_ATTR); + } + +LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE_miss: + { + // Special-cased because the standard opcode does not push + // "owner" to the stack, but we need to push it if we DEOPT. + STAT_INC(LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE, miss); + STAT_INC(LOAD_ATTR, miss); + SpecializedCacheEntry *caches = _GetSpecializedCacheEntryForInstruction( + first_instr, INSTR_OFFSET() + 1, _Py_OPARG(*next_instr)); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + cache0->counter--; + if (cache0->counter == 0) { + assert(_Py_OPCODE(next_instr[0]) == LOAD_ATTR_INSTANCE_VALUE); + next_instr[0] = _Py_MAKECODEUNIT(LOAD_ATTR_ADAPTIVE, _Py_OPARG(next_instr[0])); + assert(_Py_OPCODE(next_instr[-1]) == LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE); + next_instr[-1] = _Py_MAKECODEUNIT(LOAD_FAST, _Py_OPARG(next_instr[-1])); + if (_Py_OPCODE(next_instr[-2]) == LOAD_FAST) { + next_instr[-2] = _Py_MAKECODEUNIT(LOAD_FAST__LOAD_FAST, _Py_OPARG(next_instr[-2])); + } + STAT_INC(LOAD_ATTR, deopt); + } + JUMP_TO_INSTRUCTION(LOAD_FAST); + } + binary_subscr_dict_error: { PyObject *sub = POP(); diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index f6cbec75089e5e..dc7e116d21827a 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -168,7 +168,7 @@ static void *opcode_targets[256] = { &&TARGET_PRECALL, &&TARGET_LOAD_CONST__LOAD_FAST, &&TARGET_STORE_FAST__STORE_FAST, - &&_unknown_opcode, + &&TARGET_LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE, &&_unknown_opcode, &&TARGET_CALL, &&TARGET_KW_NAMES, diff --git a/Python/specialize.c b/Python/specialize.c index 7dd12c7de2aeb6..53ffcc53266a13 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -889,6 +889,16 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp return -1; } if (err) { + if (_Py_OPCODE(instr[0]) == LOAD_ATTR_INSTANCE_VALUE) { + // Note: instr[-1] exists because there's something on the stack, + // and instr[-2] exists because there's at least a RESUME as well. + if (_Py_OPCODE(instr[-1]) == LOAD_FAST) { + instr[-1] = _Py_MAKECODEUNIT(LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE, _Py_OPARG(instr[-1])); + if (_Py_OPCODE(instr[-2]) == LOAD_FAST__LOAD_FAST) { + instr[-2] = _Py_MAKECODEUNIT(LOAD_FAST, _Py_OPARG(instr[-2])); + } + } + } goto success; } fail: From dd3e1438d709c4896d714197c2d5dbc1205b7083 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Tue, 22 Feb 2022 05:14:28 +0000 Subject: [PATCH 2/6] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Core and Builtins/2022-02-22-05-14-25.bpo-46823.z9NZC9.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-02-22-05-14-25.bpo-46823.z9NZC9.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-02-22-05-14-25.bpo-46823.z9NZC9.rst b/Misc/NEWS.d/next/Core and Builtins/2022-02-22-05-14-25.bpo-46823.z9NZC9.rst new file mode 100644 index 00000000000000..908f48d33f285f --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-02-22-05-14-25.bpo-46823.z9NZC9.rst @@ -0,0 +1 @@ +Implement a specialized combined opcode ``LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE``. Patch by Dennis Sweeney. From b6e0b7b7d5cc1494c3cf8c750b95e4811112530e Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Tue, 22 Feb 2022 00:44:29 -0500 Subject: [PATCH 3/6] Fix compilation for Py_STATS --- Python/ceval.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/ceval.c b/Python/ceval.c index d94aafc3a9117a..c64ccede807a9d 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -5477,7 +5477,7 @@ MISS_WITH_OPARG_COUNTER(STORE_SUBSCR) next_instr[-3] = _Py_MAKECODEUNIT(LOAD_FAST__LOAD_FAST, _Py_OPARG(next_instr[-3])); } } - STAT_INC(opname, deopt); + STAT_INC(LOAD_ATTR, deopt); cache_backoff(cache); } oparg = cache->original_oparg; From ec5b71244c5e3477b5ad76009581c8385bfb8876 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Tue, 22 Feb 2022 14:48:39 -0500 Subject: [PATCH 4/6] Improve comment --- Python/ceval.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 5a045c9b31a5ac..bdab82653f6800 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -5578,8 +5578,8 @@ MISS_WITH_OPARG_COUNTER(STORE_SUBSCR) LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE_miss: { - // Special-cased because the standard opcode does not push - // "owner" to the stack, but we need to push it if we DEOPT. + // This is special-cased because we have a specialization of + // LOAD_FAST that borrows cache from the following instruction. STAT_INC(LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE, miss); STAT_INC(LOAD_ATTR, miss); SpecializedCacheEntry *caches = _GetSpecializedCacheEntryForInstruction( From 273926c93da2dd37c93a4c8d57d0babc6f0e6f59 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Thu, 24 Feb 2022 00:46:24 -0500 Subject: [PATCH 5/6] From review: do LOAD_FAST and jump to a shared miss label --- Python/ceval.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index bdab82653f6800..b93b0c659654f9 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1769,7 +1769,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr } TARGET(LOAD_FAST) { - PREDICTED(LOAD_FAST); PyObject *value = GETLOCAL(oparg); if (value == NULL) { goto unbound_local_error; @@ -5578,25 +5577,24 @@ MISS_WITH_OPARG_COUNTER(STORE_SUBSCR) LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE_miss: { - // This is special-cased because we have a specialization of - // LOAD_FAST that borrows cache from the following instruction. - STAT_INC(LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE, miss); - STAT_INC(LOAD_ATTR, miss); - SpecializedCacheEntry *caches = _GetSpecializedCacheEntryForInstruction( - first_instr, INSTR_OFFSET() + 1, _Py_OPARG(*next_instr)); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - cache0->counter--; - if (cache0->counter == 0) { - assert(_Py_OPCODE(next_instr[0]) == LOAD_ATTR_INSTANCE_VALUE); - next_instr[0] = _Py_MAKECODEUNIT(LOAD_ATTR_ADAPTIVE, _Py_OPARG(next_instr[0])); - assert(_Py_OPCODE(next_instr[-1]) == LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE); - next_instr[-1] = _Py_MAKECODEUNIT(LOAD_FAST, _Py_OPARG(next_instr[-1])); - if (_Py_OPCODE(next_instr[-2]) == LOAD_FAST) { - next_instr[-2] = _Py_MAKECODEUNIT(LOAD_FAST__LOAD_FAST, _Py_OPARG(next_instr[-2])); - } - STAT_INC(LOAD_ATTR, deopt); + // This is special-cased because we have a superinstruction + // that includes a specialized instruction. + // If the specialized portion misses, carry out + // the first instruction, then perform a miss + // for the second instruction as usual. + + // Do LOAD_FAST + { + PyObject *value = GETLOCAL(oparg); + assert(value != NULL); // Already checked if unbound + Py_INCREF(value); + PUSH(value); + NEXTOPARG(); + next_instr++; } - JUMP_TO_INSTRUCTION(LOAD_FAST); + + // Now we are in the correct state for LOAD_ATTR + goto LOAD_ATTR_INSTANCE_VALUE_miss; } binary_subscr_dict_error: From 6450e4bc64b0cc41e48c3f0ee61c011f7e484846 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Thu, 24 Feb 2022 01:42:10 -0500 Subject: [PATCH 6/6] Update comment --- Python/ceval.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/ceval.c b/Python/ceval.c index d91fe80bdbda3f..f3bdaf13770774 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3457,7 +3457,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr _PyAdaptiveEntry *cache0 = &caches[0].adaptive; assert(cache0->version != 0); PyTypeObject *tp = Py_TYPE(owner); - // Note: these DEOPT_IFs jump back to LOAD_FAST. + // These DEOPT_IF miss branches do PUSH(Py_NewRef(owner)). DEOPT_IF(tp->tp_version_tag != cache0->version, LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE); assert(tp->tp_dictoffset < 0);