From bb9166b67c5cd77efa411ec8679b609ea0886e28 Mon Sep 17 00:00:00 2001 From: Lautaro Javier Fernandez Pricco Date: Sat, 25 Apr 2026 18:25:39 +0200 Subject: [PATCH] gh-133672: Allow LOAD_FAST to be optimized to LOAD_FAST_BORROW Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Lib/test/test_dis.py | 4 +- Lib/test/test_peepholer.py | 93 ++++ ...-05-09-00-06-52.gh-issue-133672.y6QwwH.rst | 1 + Python/flowgraph.c | 455 +++++++++++++++++- 4 files changed, 542 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-09-00-06-52.gh-issue-133672.y6QwwH.rst diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index cc9bc918b616e2..6ca326569f4e19 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -542,7 +542,7 @@ def _with(c): dis_with = """\ %4d RESUME 0 -%4d LOAD_FAST_BORROW 0 (c) +%4d LOAD_FAST 0 (c) COPY 1 LOAD_SPECIAL 1 (__exit__) SWAP 2 @@ -1922,7 +1922,7 @@ def _prepare_test_cases(): make_inst(opname='LOAD_SMALL_INT', arg=0, argval=0, argrepr='', offset=244, start_offset=244, starts_line=False, line_number=21), make_inst(opname='BINARY_OP', arg=11, argval=11, argrepr='/', offset=246, start_offset=246, starts_line=False, line_number=21, cache_info=[('counter', 1, b'\x00\x00'), ('descr', 4, b'\x00\x00\x00\x00\x00\x00\x00\x00')]), make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=258, start_offset=258, starts_line=False, line_number=21), - make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=260, start_offset=260, starts_line=True, line_number=25), + make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=260, start_offset=260, starts_line=True, line_number=25), make_inst(opname='COPY', arg=1, argval=1, argrepr='', offset=262, start_offset=262, starts_line=False, line_number=25), make_inst(opname='LOAD_SPECIAL', arg=1, argval=1, argrepr='__exit__', offset=264, start_offset=264, starts_line=False, line_number=25), make_inst(opname='SWAP', arg=2, argval=2, argrepr='', offset=266, start_offset=266, starts_line=False, line_number=25), diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index abb071451d8b59..bf442fe1085557 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -2598,6 +2598,36 @@ def test_for_iter(self): ("LOAD_CONST", 0, 7), ("RETURN_VALUE", None, 8), ] + expected = [ + ("LOAD_FAST_BORROW", 0, 1), + top := self.Label(), + ("FOR_ITER", end := self.Label(), 2), + ("STORE_FAST", 2, 3), + ("JUMP", top, 4), + end, + ("END_FOR", None, 5), + ("POP_TOP", None, 6), + ("LOAD_CONST", 0, 7), + ("RETURN_VALUE", None, 8), + ] + self.cfg_optimization_test(insts, expected, consts=[None]) + + def test_for_iter_checks_fallthrough(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("GET_ITER", 0, 2), + top := self.Label(), + ("FOR_ITER", end := self.Label(), 3), + ("STORE_FAST", 1, 4), + ("LOAD_CONST", 0, 5), + ("STORE_FAST", 0, 6), + ("JUMP", top, 7), + end, + ("END_FOR", None, 8), + ("POP_ITER", None, 9), + ("LOAD_CONST", 0, 10), + ("RETURN_VALUE", None, 11), + ] self.cfg_optimization_test(insts, insts, consts=[None]) def test_load_attr(self): @@ -2675,6 +2705,69 @@ def test_send(self): ] self.cfg_optimization_test(insts, expected, consts=[None]) + def test_borrow_checks_exception_handler_edges(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("JUMP", start := self.Label(), 2), + start, + ("SETUP_FINALLY", handler := self.Label(), 3), + ("LOAD_CONST", 0, 4), + ("BINARY_OP", 0, 5), + ("POP_BLOCK", None, 6), + ("RETURN_VALUE", None, 7), + handler, + ("STORE_FAST", 1, 8), + ("LOAD_CONST", 0, 9), + ("STORE_FAST", 0, 10), + ("POP_TOP", None, 11), + ("LOAD_CONST", 0, 12), + ("RETURN_VALUE", None, 13), + ] + expected = [ + ("LOAD_FAST", 0, 1), + ("NOP", None, 2), + ("SETUP_FINALLY", handler := self.Label(), 3), + ("LOAD_CONST", 0, 4), + ("BINARY_OP", 0, 5), + ("NOP", None, 6), + ("RETURN_VALUE", None, 7), + handler, + ("STORE_FAST", 1, 8), + ("LOAD_CONST", 0, 9), + ("STORE_FAST", 0, 10), + ("POP_TOP", None, 11), + ("LOAD_CONST", 0, 12), + ("RETURN_VALUE", None, 13), + ] + self.cfg_optimization_test(insts, expected, consts=[None]) + + def test_copied_reference_is_not_borrowed(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("COPY", 1, 1), + ("TO_BOOL", None, 1), + ("POP_JUMP_IF_TRUE", target := self.Label(), 1), + ("POP_TOP", None, 1), + ("LOAD_FAST", 1, 1), + target, + ("STORE_FAST", 0, 1), + ("LOAD_CONST", 0, 2), + ("RETURN_VALUE", None, 2), + ] + expected = [ + ("LOAD_FAST", 0, 1), + ("COPY", 1, 1), + ("TO_BOOL", None, 1), + ("POP_JUMP_IF_TRUE", target := self.Label(), 1), + ("POP_TOP", None, 1), + ("LOAD_FAST", 1, 1), + target, + ("STORE_FAST", 0, 1), + ("LOAD_CONST", 0, 2), + ("RETURN_VALUE", None, 2), + ] + self.cfg_optimization_test(insts, expected, consts=[None]) + def test_format_simple(self): # FORMAT_SIMPLE will leave its operand on the stack if it's a unicode # object. We treat it conservatively and assume that it always leaves diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-09-00-06-52.gh-issue-133672.y6QwwH.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-09-00-06-52.gh-issue-133672.y6QwwH.rst new file mode 100644 index 00000000000000..63d014e4329f8c --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-09-00-06-52.gh-issue-133672.y6QwwH.rst @@ -0,0 +1 @@ +The peephole optimizer now converts LOAD_FAST to LOAD_FAST_BORROW more aggressively. This optimization is applied even if the loaded variable remains live on the stack at the end of a basic block, as long as the borrow is otherwise determined to be safe. This can reduce reference counting overhead in certain code patterns, such as with iterables in loops. diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 202e3bacf2e1bf..3e84666b49ce11 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -71,6 +71,9 @@ typedef struct _PyCfgBasicblock { unsigned b_cold : 1; /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */ unsigned b_warm : 1; + /* Used by the borrow-safe analysis to mark visited block states. */ + int b_dfs_visited_gen; + Py_ssize_t b_dfs_visited_depth; } basicblock; @@ -2696,6 +2699,8 @@ typedef enum { STORED_AS_LOCAL = 2, // The loaded reference is still on the stack at the end of the basic block REF_UNCONSUMED = 4, + // The loaded reference is duplicated on the stack + REF_COPIED = 8, } LoadFastInstrFlag; static void @@ -2730,6 +2735,354 @@ load_fast_push_block(basicblock ***sp, basicblock *target, } } +static bool +is_borrow_safe( + basicblock *current_block, + Py_ssize_t depth_of_value_on_entry, + int original_local_idx, + cfg_builder *g, + Py_ssize_t stack_depth_on_entry, + int dfs_generation); + +static bool +is_exception_path_borrow_safe( + cfg_instr *instr, + Py_ssize_t current_target_depth, + Py_ssize_t current_stack_depth, + int original_local_idx, + cfg_builder *g, + int dfs_generation) +{ + basicblock *handler = instr->i_except; + if (handler == NULL) { + return true; + } + + Py_ssize_t unwind_depth = handler->b_startdepth - 1; + if (handler->b_preserve_lasti) { + unwind_depth--; + } + if (unwind_depth < 0) { + return false; + } + + Py_ssize_t value_stack_index = current_stack_depth - current_target_depth - 1; + if (value_stack_index < 0) { + return false; + } + if (value_stack_index >= unwind_depth) { + return true; + } + + Py_ssize_t handler_entry_depth = handler->b_startdepth - value_stack_index - 1; + return is_borrow_safe( + handler, handler_entry_depth, original_local_idx, g, + handler->b_startdepth, dfs_generation); +} + +/* + * Recursively determine if a borrowed reference remains safe along all CFG paths. + * + * This function performs a DFS starting from 'current_block'. + * It tracks a specific value that was notionally loaded by a LOAD_FAST_BORROW + * instruction and is currently at 'depth_of_value_on_entry' on the operand stack + * (0 being TOS, -1 if already known to be consumed). The 'original_local_idx' + * is the index of the local variable from which this value was borrowed. + * + * A borrow is considered UNSAFE if, along any execution path before the + * borrowed value is consumed from the stack: + * 1. The 'original_local_idx' (the backing store for the borrow) is overwritten + * (e.g., by STORE_FAST, DELETE_FAST on that local). + * 2. The borrowed value itself, when at the top of the stack (depth 0), is + * consumed by an instruction that stores it into any local variable + * (e.g., STORE_FAST). + * 3. The value is not consumed and the CFG path ends (e.g., end of function + * without consumption) or enters a cycle where it's not consumed. + * + * The function uses the current DFS generation in conjunction with + * 'current_block->b_dfs_visited_gen' and 'b_dfs_visited_depth' to detect + * cycles within a specific DFS traversal. Revisiting the same block with the + * same tracked-value depth is safe because the current traversal already + * checked the loop body for local kills. + * + * Stack depths are tracked by: + * - 'depth_of_value_on_entry': The depth of the tracked item upon entering 'current_block'. + * - 'current_target_depth': The depth of the item as instructions in 'current_block' are processed. + * - 'depth_before_jump_op_effect': When a jump instruction is encountered, this + * is calculated by simulating all prior instructions in 'current_block' to find + * the item's depth *just before* the jump instruction itself has any stack effect. + * This precise depth is then used to calculate the 'target_entry_depth' for the + * recursive call to the jump's target block. + * - 'stack_depth_on_entry': The total operand stack depth upon entering + * 'current_block'. This is used to determine whether exception unwinding + * pops the tracked item or leaves it live in the exception handler. + * + */ +static bool +is_borrow_safe( + basicblock *current_block, + Py_ssize_t depth_of_value_on_entry, + int original_local_idx, + cfg_builder *g, + Py_ssize_t stack_depth_on_entry, + int dfs_generation) +{ + if (depth_of_value_on_entry == -1) { + return true; + } + + if (current_block->b_dfs_visited_gen == dfs_generation) { + return current_block->b_dfs_visited_depth == depth_of_value_on_entry; + } + current_block->b_dfs_visited_gen = dfs_generation; + current_block->b_dfs_visited_depth = depth_of_value_on_entry; + + Py_ssize_t current_target_depth = depth_of_value_on_entry; + Py_ssize_t current_stack_depth = stack_depth_on_entry; + + for (int i = 0; i < current_block->b_iused; i++) { + cfg_instr *instr = ¤t_block->b_instr[i]; + int opcode = instr->i_opcode; + int oparg = instr->i_oparg; + + if ((opcode == STORE_FAST || opcode == DELETE_FAST || opcode == LOAD_FAST_AND_CLEAR) && + oparg == original_local_idx) { + return false; + } + + if (!is_exception_path_borrow_safe( + instr, current_target_depth, current_stack_depth, + original_local_idx, g, dfs_generation)) + { + return false; + } + + stack_effects effects_noj; + if (get_stack_effects(opcode, oparg, 0, &effects_noj) < 0) { + return false; + } + int num_popped = _PyOpcode_num_popped(opcode, oparg); + int num_pushed = _PyOpcode_num_pushed(opcode, oparg); + + if (opcode == FOR_ITER && current_target_depth < num_popped) { + if (current_block->b_next && BB_HAS_FALLTHROUGH(current_block)) { + Py_ssize_t fallthrough_depth = current_target_depth - + num_popped + num_pushed; + Py_ssize_t fallthrough_stack_depth = current_stack_depth - + num_popped + num_pushed; + return is_borrow_safe( + current_block->b_next, fallthrough_depth, original_local_idx, + g, fallthrough_stack_depth, dfs_generation); + } + return true; + } + + if (current_target_depth < num_popped) { + if (opcode == STORE_FAST && current_target_depth == 0) { + return false; + } + return true; + } + current_target_depth = current_target_depth - num_popped + num_pushed; + current_stack_depth = current_stack_depth - num_popped + num_pushed; + + if (HAS_TARGET(opcode)) { + if (i != current_block->b_iused - 1) { + continue; // Skip jumps that are not the last instruction in the block. + } + bool safe_on_all_branches = true; + + Py_ssize_t depth_before_jump_op_effect = depth_of_value_on_entry; + Py_ssize_t stack_depth_before_jump_op_effect = stack_depth_on_entry; + for(int k=0; k < i; ++k) { + cfg_instr *prev_instr_in_block = ¤t_block->b_instr[k]; + // Kill check for intermediate instructions + if ((prev_instr_in_block->i_opcode == STORE_FAST || + prev_instr_in_block->i_opcode == DELETE_FAST || + prev_instr_in_block->i_opcode == LOAD_FAST_AND_CLEAR) && + prev_instr_in_block->i_oparg == original_local_idx) { + return false; + } + stack_effects prev_effects; + if (get_stack_effects(prev_instr_in_block->i_opcode, prev_instr_in_block->i_oparg, 0, &prev_effects) < 0) { + return false; + } + int prev_popped_val = _PyOpcode_num_popped(prev_instr_in_block->i_opcode, prev_instr_in_block->i_oparg); + if (depth_before_jump_op_effect < prev_popped_val) { + if (prev_instr_in_block->i_opcode == STORE_FAST && depth_before_jump_op_effect == 0) { + return false; + } + return true; + } + depth_before_jump_op_effect = depth_before_jump_op_effect - prev_popped_val + + _PyOpcode_num_pushed(prev_instr_in_block->i_opcode, prev_instr_in_block->i_oparg); + stack_depth_before_jump_op_effect = stack_depth_before_jump_op_effect - prev_popped_val + + _PyOpcode_num_pushed(prev_instr_in_block->i_opcode, prev_instr_in_block->i_oparg); + } + + // Analyze jump target path + stack_effects effects_jump; + if (get_stack_effects(opcode, oparg, 1, &effects_jump) < 0) return false; + int jump_popped_val = _PyOpcode_num_popped(opcode, oparg); + Py_ssize_t target_entry_depth = depth_before_jump_op_effect; + + if (target_entry_depth < jump_popped_val) { + if (opcode == STORE_FAST && target_entry_depth == 0) { + safe_on_all_branches = false; + } + } else { // Not consumed by jump op, recurse on target. + target_entry_depth = target_entry_depth - jump_popped_val + _PyOpcode_num_pushed(opcode, oparg); + Py_ssize_t target_entry_stack_depth = stack_depth_before_jump_op_effect - + jump_popped_val + + _PyOpcode_num_pushed(opcode, oparg); + if (!is_borrow_safe( + instr->i_target, target_entry_depth, original_local_idx, g, + target_entry_stack_depth, dfs_generation)) + { + safe_on_all_branches = false; + } + } + + // Analyze fallthrough path, if the jump path was safe. + if (safe_on_all_branches && current_block->b_next && + BB_HAS_FALLTHROUGH(current_block)) + { + if (!is_borrow_safe( + current_block->b_next, current_target_depth, original_local_idx, g, + current_stack_depth, dfs_generation)) + { + safe_on_all_branches = false; + } + } + return safe_on_all_branches; + } + } + + // When the instructions finish and the value isn't consumed, check fallthrough. + if (current_block->b_next && BB_HAS_FALLTHROUGH(current_block)) { + return is_borrow_safe( + current_block->b_next, current_target_depth, original_local_idx, g, + current_stack_depth, dfs_generation); + } + + /* + No further path (or no fallthrough) and value is still on stack (current_target_depth is valid). + This means it's unconsumed at the end of a CFG path, so it's unsafe. + */ + return false; +} + + +/* + * Initiates a CFG wide safety check for a borrowed reference. + * + * This function is called when a LOAD_FAST instruction is a candidate for + * LOAD_FAST_BORROW, but its produced value ('REF_UNCONSUMED' flag is set) + * might live beyond the current basic block ('producer_block'). + * + * It determines the immediate successor(s) of the 'producer_block' and the + * stack depth at which the borrowed value (identified by 'original_local_idx' + * and initially at 'depth_of_value_at_producer_end' from TOS) would enter + * these successors. + * + * It then calls 'is_borrow_safe()' for each successor path. The borrow is + * considered globally safe only if 'is_borrow_safe()' returns true for ALL + * possible successor paths. + * + * A new DFS traversal is started by incrementing '*dfs_generation_counter' to + * ensure that 'is_borrow_safe()' uses a fresh set of visited markers + * ('b_dfs_visited_gen') for its analysis. + * + */ +static bool +check_borrow_safety_globally( + basicblock *producer_block, + Py_ssize_t depth_of_value_at_producer_end, + int original_local_idx, + Py_ssize_t stack_depth_at_producer_end, + cfg_builder *g, + int *dfs_generation_counter) +{ + int dfs_generation = ++*dfs_generation_counter; + bool overall_safety = true; + + cfg_instr *last_instr = basicblock_last_instr(producer_block); + + // If depth is -1, implies value was already consumed or is invalid for tracking. + if (depth_of_value_at_producer_end == -1) { + return false; + } + + if (last_instr && HAS_TARGET(last_instr->i_opcode)) { + stack_effects effects_noj; + if (get_stack_effects(last_instr->i_opcode, last_instr->i_oparg, 0, &effects_noj) < 0) { + return false; + } + int fall_popped = _PyOpcode_num_popped(last_instr->i_opcode, last_instr->i_oparg); + int fall_pushed = _PyOpcode_num_pushed(last_instr->i_opcode, last_instr->i_oparg); + Py_ssize_t depth_before_jump = depth_of_value_at_producer_end + fall_popped - fall_pushed; + Py_ssize_t stack_depth_before_jump = stack_depth_at_producer_end + fall_popped - fall_pushed; + + stack_effects effects_jump; + if (get_stack_effects(last_instr->i_opcode, last_instr->i_oparg, 1, &effects_jump) < 0) { + return false; + } + int jump_popped = _PyOpcode_num_popped(last_instr->i_opcode, last_instr->i_oparg); + Py_ssize_t entry_depth_for_target = depth_before_jump; + + if (entry_depth_for_target < jump_popped) { + if (last_instr->i_opcode == STORE_FAST && entry_depth_for_target == 0) { + overall_safety = false; + } + } else { + entry_depth_for_target = entry_depth_for_target - jump_popped + + _PyOpcode_num_pushed(last_instr->i_opcode, last_instr->i_oparg); + Py_ssize_t entry_stack_depth_for_target = stack_depth_before_jump - + jump_popped + + _PyOpcode_num_pushed(last_instr->i_opcode, last_instr->i_oparg); + if (!is_borrow_safe( + last_instr->i_target, entry_depth_for_target, original_local_idx, g, + entry_stack_depth_for_target, dfs_generation)) + { + overall_safety = false; + } + } + + // Analyze fallthrough path, if the jump path was safe. + if (overall_safety && producer_block->b_next && + BB_HAS_FALLTHROUGH(producer_block)) + { + if (!is_borrow_safe( + producer_block->b_next, depth_of_value_at_producer_end, original_local_idx, g, + stack_depth_at_producer_end, dfs_generation)) + { + overall_safety = false; + } + } + } else if (producer_block->b_next && BB_HAS_FALLTHROUGH(producer_block)) { // Standard fallthrough, no jump. + if (!is_borrow_safe( + producer_block->b_next, depth_of_value_at_producer_end, original_local_idx, g, + stack_depth_at_producer_end, dfs_generation)) + { + overall_safety = false; + } + } else { + overall_safety = false; + } + return overall_safety; +} + +static bool +has_nonterminal_target_after(basicblock *block, int instr_idx) +{ + for (int i = instr_idx + 1; i < block->b_iused - 1; i++) { + if (HAS_TARGET(block->b_instr[i].i_opcode)) { + return true; + } + } + return false; +} + /* * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that * load borrowed references onto the operand stack. @@ -2773,9 +3126,12 @@ optimize_load_fast(cfg_builder *g) int status; ref_stack refs = {0}; int max_instrs = 0; + int dfs_generation_counter = 0; basicblock *entryblock = g->g_entryblock; for (basicblock *b = entryblock; b != NULL; b = b->b_next) { max_instrs = Py_MAX(max_instrs, b->b_iused); + b->b_dfs_visited_gen = 0; + b->b_dfs_visited_depth = -1; } size_t instr_flags_size = max_instrs * sizeof(uint8_t); uint8_t *instr_flags = PyMem_Malloc(instr_flags_size); @@ -2877,6 +3233,9 @@ optimize_load_fast(cfg_builder *g) Py_ssize_t idx = refs.size - oparg; ref r = ref_stack_at(&refs, idx); PUSH_REF(r.instr, r.local); + if (r.instr != DUMMY_INSTR) { + instr_flags[r.instr] |= REF_COPIED; + } break; } @@ -3031,17 +3390,95 @@ optimize_load_fast(cfg_builder *g) // Optimize instructions for (int i = 0; i < block->b_iused; i++) { - if (!instr_flags[i]) { - cfg_instr *instr = &block->b_instr[i]; - switch (instr->i_opcode) { - case LOAD_FAST: + cfg_instr *instr = &block->b_instr[i]; + bool is_load_fast_type = (instr->i_opcode == LOAD_FAST || instr->i_opcode == LOAD_FAST_LOAD_FAST); + + if (is_load_fast_type) { + bool killed_or_stored_locally = (instr_flags[i] & (SUPPORT_KILLED | STORED_AS_LOCAL)); + if (killed_or_stored_locally) { + continue; // Definitely cannot borrow due to local block issues + } + if (instr_flags[i] & REF_COPIED) { + continue; + } + + bool unconsumed_in_block = (instr_flags[i] & REF_UNCONSUMED); + bool can_borrow = false; + + if (!unconsumed_in_block) { + can_borrow = true; // Safe by local analysis, consumed in current block + } else if (has_nonterminal_target_after(block, i)) { + can_borrow = false; + } else { + if (instr->i_opcode == LOAD_FAST) { + int local_idx = instr->i_oparg; + Py_ssize_t depth_from_tos_at_block_end = -1; + // Find the specific item on the `refs` stack (at end of current block simulation) + for (Py_ssize_t k = 0; k < refs.size; k++) { + ref r = ref_stack_at(&refs, k); + if (r.instr == i && r.local == local_idx) { // Match instruction and local + depth_from_tos_at_block_end = refs.size - 1 - k; + break; + } + } + + if (depth_from_tos_at_block_end != -1) { + can_borrow = check_borrow_safety_globally( + block, depth_from_tos_at_block_end, local_idx, + refs.size, g, &dfs_generation_counter); + } else { + // If REF_UNCONSUMED is set but we couldn't find its depth, assume unsafe. + // This can happen if refs.size is 0, yet flag is set. + can_borrow = false; + } + + } else { // LOAD_FAST_LOAD_FAST + can_borrow = true; // Assume true, prove false if any part is unsafe + int local_idx1 = instr->i_oparg >> 4; + int local_idx2 = instr->i_oparg & 15; + Py_ssize_t depth1 = -1, depth2 = -1; + + // Find depths on `refs` stack for both products of this LFLF instruction `i` + for (Py_ssize_t k = 0; k < refs.size; k++) { + ref r = ref_stack_at(&refs, k); + if (r.instr == i) { + Py_ssize_t current_depth = refs.size - 1 - k; + if (r.local == local_idx1 && depth1 == -1) depth1 = current_depth; + else if (r.local == local_idx2 && depth2 == -1) depth2 = current_depth; + } + } + + bool found_any_on_stack = (depth1 != -1 || depth2 != -1); + + if (depth1 != -1) { + if (!check_borrow_safety_globally( + block, depth1, local_idx1, refs.size, g, + &dfs_generation_counter)) + { + can_borrow = false; + } + } + if (can_borrow && depth2 != -1) { + if (!check_borrow_safety_globally( + block, depth2, local_idx2, refs.size, g, + &dfs_generation_counter)) + { + can_borrow = false; + } + } + + if (unconsumed_in_block && !found_any_on_stack) { + can_borrow = false; + } + } + } + + if (can_borrow) { + if (instr->i_opcode == LOAD_FAST) { instr->i_opcode = LOAD_FAST_BORROW; - break; - case LOAD_FAST_LOAD_FAST: + } else if (instr->i_opcode == LOAD_FAST_LOAD_FAST) { instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW; - break; - default: - break; + } } } }