From ea7c668110da2c42f115fd74d02702f540762634 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Wed, 22 Apr 2026 11:44:06 +0200 Subject: [PATCH 1/6] perf(investigation): add JPERL_CLASSIC gate, confirm cumulative-tax hypothesis Investigation result, not a user-facing feature. Gate behind JPERL_CLASSIC env var (read once into a static final boolean at class init, JIT DCEs the branch entirely). When set, collapses the branch's added refcount / walker / weaken / DESTROY machinery to master-like behavior so we can measure whether the full master->branch regression is recoverable. Gated sites: - MortalList.active = false -> deferDecrement*, scopeExitCleanupHash/Array, mortalizeForVoidDiscard all short-circuit via existing !active guards - EmitStatement.emitScopeExitNullStores: Phase 1 (scopeExitCleanup), Phase 1b (cleanupHash/Array), Phase E (unregister), Phase 3 (flush) emissions all suppressed - EmitVariable: MyVarCleanupStack.register emission on my suppressed - MyVarCleanupStack.register/unregister: early-return - RuntimeScalar.scopeExitCleanup: early-return - RuntimeScalar.setLargeRefCounted: direct field assignment, skipping refcount/WeakRefRegistry/MortalList work Results (life_bitpacked -g 500, 5 runs, median): baseline : 8.51 Mcells/s JPERL_CLASSIC=1 : 14.18 Mcells/s (1.67x, recovers full master gap) system perl reference : ~21 Mcells/s master pre-merge ref : ~14 Mcells/s Also confirmed on benchmark_lexical: 314k -> 357k iters/s (1.14x) on a workload with zero references or blesses -- proving the taxes are broadly distributed, not localized to any one hot method. No behavior change on the default path (env var absent). destroy_eval_die.t still shows same pre-existing 9/10 (test #4 is a documented known failure). CLASSIC is not safe for production use: it breaks DESTROY, weaken, walker semantics. Only useful as a measurement tool and as a map of sites that need per-object gating for the real fix (Phase R in the plan). See dev/design/classic_experiment_finding.md for full analysis and next steps. Plan updated in dev/design/perl_parity_plan.md to retire Phase 1-4 in favor of Phase R (per-object needsCleanup bit). Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- dev/bench/results/baseline-078e0b3d7.json | 21 + dev/bench/results/baseline-078e0b3d7.md | 23 + dev/design/classic_experiment_finding.md | 102 ++++ dev/design/life_bitpacked_jfr_profile.md | 75 +++ dev/design/perl_parity_plan.md | 438 ++++++++++++++++++ .../perlonjava/backend/jvm/EmitStatement.java | 52 ++- .../perlonjava/backend/jvm/EmitVariable.java | 2 +- .../org/perlonjava/core/Configuration.java | 6 +- .../runtime/runtimetypes/MortalList.java | 16 + .../runtimetypes/MyVarCleanupStack.java | 3 +- .../runtime/runtimetypes/RuntimeScalar.java | 8 + 11 files changed, 717 insertions(+), 29 deletions(-) create mode 100644 dev/bench/results/baseline-078e0b3d7.json create mode 100644 dev/bench/results/baseline-078e0b3d7.md create mode 100644 dev/design/classic_experiment_finding.md create mode 100644 dev/design/life_bitpacked_jfr_profile.md create mode 100644 dev/design/perl_parity_plan.md diff --git a/dev/bench/results/baseline-078e0b3d7.json b/dev/bench/results/baseline-078e0b3d7.json new file mode 100644 index 000000000..b111a71cb --- /dev/null +++ b/dev/bench/results/baseline-078e0b3d7.json @@ -0,0 +1,21 @@ +{ + "git_sha": "078e0b3d7", + "date": "2026-04-21T21:17:48Z", + "runs": 3, + "jperl": "/Users/fglock/projects/PerlOnJava3/jperl", + "perl": "perl", + "perl_version": "5.042000", + "benchmarks": { + "benchmark_anon_simple": { "unit": "s", "jperl": [7.149,7.020,7.213], "perl": [1.435,1.454,1.427] }, + "benchmark_closure": { "unit": "s", "jperl": [8.784,9.783,9.768], "perl": [8.108,7.961,7.877] }, + "benchmark_eval_string": { "unit": "s", "jperl": [14.766,14.777,14.365], "perl": [3.135,3.164,3.276] }, + "benchmark_global": { "unit": "s", "jperl": [14.608,14.579,14.720], "perl": [10.993,11.063,9.400] }, + "benchmark_lexical": { "unit": "s", "jperl": [4.059,4.010,3.989], "perl": [10.589,10.581,10.441] }, + "benchmark_method": { "unit": "s", "jperl": [2.620,2.537,2.607], "perl": [1.456,1.490,1.511] }, + "benchmark_refcount_anon": { "unit": "s", "jperl": [1.792,1.807,1.776], "perl": [0.455,0.447,0.443] }, + "benchmark_refcount_bless": { "unit": "s", "jperl": [1.293,1.305,1.311], "perl": [0.197,0.198,0.197] }, + "benchmark_regex": { "unit": "s", "jperl": [2.732,2.719,2.701], "perl": [1.974,2.005,2.006] }, + "benchmark_string": { "unit": "s", "jperl": [4.131,4.025,4.066], "perl": [6.887,6.867,6.977] }, + "life_bitpacked": { "unit": "Mcells/s", "jperl": [8.21,8.12,8.28], "perl": [20.99,20.58,20.70] } + } +} diff --git a/dev/bench/results/baseline-078e0b3d7.md b/dev/bench/results/baseline-078e0b3d7.md new file mode 100644 index 000000000..fb3f4aa34 --- /dev/null +++ b/dev/bench/results/baseline-078e0b3d7.md @@ -0,0 +1,23 @@ +# Benchmark baseline — 078e0b3d7 + +**Date:** 2026-04-21T21:17:48Z +**Runs per benchmark:** 3 +**jperl:** `/Users/fglock/projects/PerlOnJava3/jperl` +**perl:** `perl` (5.042000) + +For "time" benches lower = faster; ratio is `jperl / perl`. +For "Mcells/s" (life_bitpacked) higher = faster; ratio is `perl / jperl`. + +| Benchmark | unit | jperl | perl | ratio | parity? | +|---|---|---:|---:|---:|:---:| +| `benchmark_anon_simple` | s | 7.127 | 1.439 | **4.95×** | ❌ | +| `benchmark_closure` | s | 9.445 | 7.982 | **1.18×** | ≈ | +| `benchmark_eval_string` | s | 14.636 | 3.192 | **4.59×** | ❌ | +| `benchmark_global` | s | 14.636 | 10.485 | **1.40×** | ❌ | +| `benchmark_lexical` | s | 4.019 | 10.537 | **0.38×** | ✅ | +| `benchmark_method` | s | 2.588 | 1.486 | **1.74×** | ❌ | +| `benchmark_refcount_anon` | s | 1.792 | 0.448 | **4.00×** | ❌ | +| `benchmark_refcount_bless` | s | 1.303 | 0.197 | **6.61×** | ❌ | +| `benchmark_regex` | s | 2.717 | 1.995 | **1.36×** | ❌ | +| `benchmark_string` | s | 4.074 | 6.910 | **0.59×** | ✅ | +| `life_bitpacked` | Mcells/s | 8.203 | 20.757 | **2.53×** | ❌ | diff --git a/dev/design/classic_experiment_finding.md b/dev/design/classic_experiment_finding.md new file mode 100644 index 000000000..48a8c9af1 --- /dev/null +++ b/dev/design/classic_experiment_finding.md @@ -0,0 +1,102 @@ +# JPERL_CLASSIC experiment — cumulative-tax hypothesis confirmed + +**Branch:** `perf/perl-parity-phase1` @ 3c2ca4b6a + CLASSIC gate patches (4 files) +**Date:** 2026-04-18 +**Hypothesis:** The master→branch regression (1.67× on life_bitpacked) is NOT attributable to any single hot method. It is the cumulative cost of many small taxes added by the refcount/walker/weaken/DESTROY machinery, each individually invisible in a profile. + +## Test + +Added `JPERL_CLASSIC` env var (read once at class-init into a `static final boolean`). When set, short-circuits the branch's added machinery to near-master behavior: + +| Site | CLASSIC behavior | +|---|---| +| `MortalList.active` | `false` — every `deferDecrement*` / `scopeExitCleanup{Hash,Array}` / `mortalizeForVoidDiscard` early-returns | +| `EmitStatement.emitScopeExitNullStores` Phase 1 (`scopeExitCleanup` per scalar) | Not emitted | +| `EmitStatement.emitScopeExitNullStores` Phase 1b (cleanupHash/Array) | Not emitted | +| `EmitStatement.emitScopeExitNullStores` Phase E (`MyVarCleanupStack.unregister`) | Not emitted | +| `EmitStatement.emitScopeExitNullStores` Phase 3 (`MortalList.flush`) | Not emitted | +| `EmitVariable` `MyVarCleanupStack.register` on every `my` | Not emitted | +| `MyVarCleanupStack.register` / `unregister` | Early-return | +| `RuntimeScalar.scopeExitCleanup` | Early-return | +| `RuntimeScalar.setLargeRefCounted` | Direct field assignment, skipping refcount/WeakRefRegistry/MortalList work | + +Correctness: CLASSIC breaks DESTROY, weaken, walker semantics — only useful for measurement, not shipping. + +## Result — life_bitpacked + +`./jperl examples/life_bitpacked.pl -r none -g 500`, 5 runs each, median: + +| Mode | Runs (Mcells/s) | Median | +|---|---|---:| +| Baseline (branch machinery on) | 8.58 / 8.51 / 8.49 / 8.51 / 8.45 | **8.51** | +| `JPERL_CLASSIC=1` | 14.18 / 14.60 / 14.14 / 13.32 / 13.77 | **14.18** | +| System perl (reference) | — | 20.8 – 21.5 | +| Master @ pre-merge (reference) | — | 14.0 | + +**Speedup: 14.18 / 8.51 = 1.666×**, essentially recovering master's pre-merge number. + +## Result — benchmark_lexical (simple, no refs) + +`./jperl dev/bench/benchmark_lexical.pl`, 3 runs each: + +| Mode | Runs (iters/s) | Median | +|---|---|---:| +| Baseline | 313484 / 329270 / 314172 | **314172** | +| `JPERL_CLASSIC=1` | 357144 / 347743 / 359080 | **357144** | + +**Speedup: 1.14×** + +Even on a workload with no references and no blesses, the `my`-variable register/unregister emissions and scope-exit cleanup emissions cost ~14%. + +## Interpretation + +The hypothesis is definitively confirmed: + +1. **The master→branch perf gap is recoverable in full** (1.67× on the most ref-heavy workload) by gating the added machinery. +2. **No single site is the bottleneck.** Phase 1 (MortalList.flush) alone was worth 0.7%. Phase 2's pristine-args stub alone was worth 0%. The 1.67× comes from ~a dozen sites each contributing 2–10%. +3. **The taxes are broadly distributed across the scope-exit / variable-declaration / reference-assignment paths.** Even workloads that never exercise DESTROY/weaken pay them. + +## Implication for the plan + +The piecewise Phase 2'/3'/4' approach was the wrong framing. The right structural fix: + +**Make the machinery per-object-opt-in, not always-on.** Perl 5's design: `SvREFCNT_inc` is free for most SVs because the type tag gates the work. Only objects that need refcount tracking pay the cost. + +Concrete proposal (call it Phase R — "refcount by need"): + +1. Add a single `needsCleanup` bit to `RuntimeBase`, default `false`. +2. Set it to `true` only when: + - The object is blessed into a class that has `DESTROY`, OR + - The object is targeted by `Scalar::Util::weaken`, OR + - The object is captured by a CODE ref whose refCount we need to track for cycle break. +3. Every CURRENT-BRANCH fast-path site becomes `if (!needsCleanup) return ;`: + - `setLargeRefCounted` → direct assignment if neither side needs cleanup + - `scopeExitCleanup` → no-op if scalar's value doesn't need cleanup + - `MyVarCleanupStack.register` → skip if the var's referent doesn't need cleanup + - `MortalList.deferDecrement*` → skip if referent doesn't need cleanup + - `scopeExitCleanupHash/Array` → skip if container has no needsCleanup descendants + +With per-object gating, life_bitpacked (zero blessed objects, zero weaken) pays zero tax and runs at ~14 Mc/s. DBIx::Class / txn_scope_guard / destroy_eval_die (objects that DO need cleanup) still work correctly. + +This is a **significant refactor** — every site listed above needs a cheap gate check. But: + +- The CLASSIC experiment has already implemented those gate checks (just globally rather than per-object). Most of the code is the early-return condition. +- The JIT will fold the `needsCleanup == false` check away to almost nothing once it sees a type-stable call site. +- Correctness is easier to reason about than the current "always-tracked" design, because the gate explicitly matches the semantic condition that requires tracking. + +## Files touched in this experiment + +``` +src/main/java/org/perlonjava/runtime/runtimetypes/MortalList.java (+CLASSIC flag, active init) +src/main/java/org/perlonjava/runtime/runtimetypes/MyVarCleanupStack.java (register/unregister early-return) +src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java (setLargeRefCounted + scopeExitCleanup early-return) +src/main/java/org/perlonjava/backend/jvm/EmitStatement.java (4 emission sites gated) +src/main/java/org/perlonjava/backend/jvm/EmitVariable.java (register emission gated) +``` + +## Next step + +Either: +1. **Commit the CLASSIC gate** as a measurement tool on `perf/perl-parity-phase1` (doesn't ship to users; helps future perf work A/B the full-feature cost). +2. **Move directly to Phase R** (per-object `needsCleanup` bit) based on this evidence, using the CLASSIC gate sites as the map of what needs per-object gating. +3. **Revert** the CLASSIC gate and keep this document as the finding. diff --git a/dev/design/life_bitpacked_jfr_profile.md b/dev/design/life_bitpacked_jfr_profile.md new file mode 100644 index 000000000..11a9935d4 --- /dev/null +++ b/dev/design/life_bitpacked_jfr_profile.md @@ -0,0 +1,75 @@ +# life_bitpacked JFR profile — what's actually hot + +**Captured:** 2026-04-18 on `perf/perl-parity-phase1` @ 3c2ca4b6a +**Workload:** `./jperl examples/life_bitpacked.pl -r none -g 20000` (17.2s wall, 14.85 Mcells/s) +**Profile:** 60s JFR with `settings=profile`, ~477 ExecutionSample events, file `dev/bench/results/jfr/life_bp_long.jfr` + +## Why this profile exists + +Phase 1 of `perl_parity_plan.md` was rejected on its upper-bound measurement (~0.7% vs 2% gate). The conclusion was that our a-priori cost model (which assumed INVOKESTATIC dispatch of `MortalList.flush` was hot) was wrong — HotSpot had already inlined the empty-case fast path. Before committing to Phase 2's large sub-call-context refactor, we profiled first. + +Phase 2 pristine-args stub experiment also showed zero improvement (same median within noise). So **the 7 ThreadLocal sub-call stacks are NOT the bottleneck either** — the JIT handles them well. + +## Hot methods (top-of-stack self-time) + +| Method | Samples | % | +|---|---:|---:| +| `RuntimeScalar.getDefinedBoolean()` | 74 | **~15%** | +| `anon230.apply` (user sub body) | 70 | 14% | +| `java.util.Arrays.copyOf` (ArrayList growth) | 70 | 14% | +| `RuntimeScalarType.blessedId(RuntimeScalar)` | 55 | 11% | +| `RuntimeScalar.set(RuntimeScalar)` | 38 | 8% | +| `RuntimeList.setFromList(RuntimeList)` | 27 | 5% | +| `RuntimeScalarCache.getScalarInt(int)` | 20 | 4% | +| `RuntimeControlFlowRegistry.checkLoopAndGetAction(String)` | 12 | 2% | +| `RuntimeScalar.scopeExitCleanup` | 6 | 1% | +| `MortalList.flush` | 5 | 1% | + +The user's bitwise ops (`bitwiseAnd`/`Xor`/`Or`/`shiftLeft`/`shiftRight`) together amount to ~**30 samples = 6%** — tiny compared to the dispatch/allocation overhead. + +## Key insights + +### 1. `getDefinedBoolean()` is the #1 self-time hit + +15% of CPU is spent deciding whether a scalar is defined. This is hit heavily by things like `if ($x)` boolean truth tests, `defined($x)` guards, and `||` / `//` expressions. Any simplification (e.g., marking cached common scalars as "always defined" and short-circuiting) would pay out immediately. + +### 2. ArrayList growth is the #2 self-time hit + +14% of CPU is spent in `Arrays.copyOf` for ArrayList growth. Stack traces show the callers are: +- `RuntimeList.add(RuntimeBase)` — return value list building in `RuntimeCode.apply` +- `RuntimeList.add(RuntimeScalar)` — user sub assembling its return list + +**This means every sub call allocates a small ArrayList that immediately grows.** Presizing or pooling could save ~14%. + +### 3. `blessedId` is 11% + +`RuntimeScalarType.blessedId(RuntimeScalar)` is hit 55 times. This is the per-method-call class dispatch path. On life_bitpacked there are no blessed objects in the hot path, so this is checking whether a scalar is blessed on every op that might use overloading. A fast-path for "not blessed" could matter. + +### 4. `MortalList.flush` is irrelevant (1%) + +Confirms Phase 1's rejection — `flush` is barely on the profile. + +### 5. ThreadLocal overhead is invisible + +No `ThreadLocal.get()` or `ArrayDeque.push/pop` in the hot list. JIT already inlines these. **Phase 2 of the original plan (consolidate 7 TL stacks) would not help life_bitpacked.** + +## Revised candidate phases + +| Phase | Hypothesis | Upper bound estimate | First test | +|---|---|---|---| +| 2' | Presize `RuntimeList` backing ArrayList to avoid grow-from-10 | **5-10%** (14% ceiling) | Change `RuntimeList`'s initial `new ArrayList<>()` to `new ArrayList<>(8)` or similar; A/B | +| 3' | Fast-path `getDefinedBoolean` for `RuntimeScalarReadOnly` / integer types | **3-5%** (15% ceiling) | Add explicit override on cached scalar types; A/B | +| 4' | Fast-path `blessedId` for non-blessed scalars | **2-4%** (11% ceiling) | Inline `blessed == null` check; A/B | + +Any single one of these has a higher upper bound than Phase 1 or original Phase 2 ever could. They should each be derisked with a minimal patch + measurement before committing to implementation, same gating as Phase 1. + +## What to do next + +1. **Retire original Phase 2** (TL consolidation) in `perl_parity_plan.md`. +2. **Adopt Phase 2'** (RuntimeList presize) as the new Phase 2 candidate. +3. **Measurement-first rule still applies** — start every phase with a minimal hack + 5-run median; if it doesn't move the needle by 2%+, reject. + +## Files + +- `dev/bench/results/jfr/life_bp_long.jfr` — raw JFR, reproducible with `JPERL_OPTS="-XX:+FlightRecorder -XX:StartFlightRecording=duration=60s,filename=...,settings=profile" ./jperl examples/life_bitpacked.pl -r none -g 20000` +- `/tmp/jfr_exec.txt` — textual dump of `jdk.ExecutionSample` events (not committed; regenerate with `jfr print --events jdk.ExecutionSample ...jfr`) diff --git a/dev/design/perl_parity_plan.md b/dev/design/perl_parity_plan.md new file mode 100644 index 000000000..954aee71e --- /dev/null +++ b/dev/design/perl_parity_plan.md @@ -0,0 +1,438 @@ +# Perl-parity plan — recovering the master-to-perl gap + +**Status:** Proposal + Phase 1 execution +**Origin:** `dev/design/perl5_internals_comparison.md` identified 5 structural hot-path differences between PerlOnJava (master and below) and system perl. This doc turns that analysis into concrete phases with per-phase measurement gates. + +## Scope + +Close the **master-to-perl** gap — the ~1.52× speed ratio master has vs system perl on `life_bitpacked` (and the similar ratio on other sub-call-heavy benches). This is about PerlOnJava's fundamental per-sub-call overhead; it is **separate** from the walker-hardening / refcount-alignment overhead vs master (that's tracked in `life_bitpacked_regression_analysis.md` and §0 of `next_steps.md`). + +## Overall measurement protocol + +Every phase MUST produce evidence from the **same measurement harness** so phases can be compared: + +1. **Correctness gates (hard):** + - `make` — all unit tests pass except pre-existing `destroy_eval_die.t#4` + - `src/test/resources/unit/refcount/destroy_eval_die.t` — same pass count as baseline + - DBIx::Class `t/storage/txn_scope_guard.t` — 18/18 + - `src/test/resources/unit/tie_scalar.t` — 12/12 + - `src/test/resources/unit/refcount/*.t` — same pass count as baseline + - **Any correctness regression blocks the phase.** + +2. **Perf gate (per phase):** + - A/B within a single process: 5 runs with the feature enabled, 5 runs with a `JPERL_NO_PHASE_N=1` env var disabling it (each phase defines its disable switch). + - `life_bitpacked` with `-r none -g 500` — median Mcells/s compared. + - Full `COMPARE=perl BENCH_RUNS=3 dev/bench/run_baseline.sh` snapshot, `baseline-.md` captured in `dev/bench/results/`. + - **Required:** median life_bitpacked improvement ≥ 2% AND no benchmark regresses > 3% compared to pre-phase baseline. + - **If neither condition holds, revert.** Phase stays as "attempted, didn't pan out" in this doc. + +3. **Comparison anchors (always measured):** + - System perl + - `feature/refcount-perf-combined` HEAD before this phase started + - Current tip (after this phase) + - Master (once per full measurement pass, as the long-term ceiling reference) + +## Phase summary & dependencies + +Phases are numbered by the order in which they should ship: + +| # | Change | Expected gain | Effort | Depends on | +|---|---|---:|---|---| +| 1 | ~~FREETMPS-style compare gating `MortalList.flush`~~ — **REJECTED** 2026-04-18, upper-bound ~0.7% | n/a | low | — | +| 2 | ~~Consolidate 7 TL stacks → one `PerlContext` struct~~ — **REJECTED** 2026-04-18, pristine-stub upper bound 0% | n/a | medium | — | +| 2' | ~~Presize `RuntimeList` backing ArrayList~~ — **SUPERSEDED** by Phase R (see below) | n/a | low-medium | — | +| 3' | ~~Fast-path `getDefinedBoolean`~~ — **SUPERSEDED** by Phase R | n/a | low | — | +| 4' | ~~Fast-path `blessedId`~~ — **SUPERSEDED** by Phase R | n/a | low | — | +| **R** | **Per-object `needsCleanup` gate across all branch machinery** (the real fix) | **~67% (life_bitpacked), 14% (lexical-only)** | high | — | +| 5 | Final cleanup & doc sync | — | low | R | + +**2026-04-18 update:** The `JPERL_CLASSIC=1` experiment (see `dev/design/classic_experiment_finding.md`) confirmed the cumulative-tax hypothesis. Disabling the branch's added machinery globally recovers 1.67× on life_bitpacked (essentially reaching pre-merge master) and 1.14× on a lexical-only bench. The master→branch gap is not one hotspot; it is ~a dozen small taxes that cannot be fixed piecewise. + +The correct structural fix (Phase R) is: add a single `needsCleanup` bit to `RuntimeBase`, set only for objects that actually need DESTROY/weaken/walker tracking, and gate every added fast-path site on that bit. The CLASSIC experiment has already mapped out exactly which sites need the gate. + +Phase 2'/3'/4' (the hotspot-driven candidates from `life_bitpacked_jfr_profile.md`) are superseded — those hotspots (`getDefinedBoolean`, `ArrayList.grow`, `blessedId`) are amplified by the same machinery and will get quieter automatically once Phase R is in. + +Why this order: + +- **Phase 1 is standalone** — no dependencies on the other phases, minimal risk, quick measurement. Serves as a sanity check that our measurement harness is sensitive enough to detect the expected-magnitude gains. +- **Phase 2 is the keystone.** Several of the later phases become cheaper once all the caller-context state lives in one struct behind one ThreadLocal (inline refcount helpers need this; array-backed stack needs this). +- **Phase 3 reuses Phase 2's struct** — the tiny inlinable refcount helpers live in or adjacent to `PerlContext`. +- **Phase 4 is the big structural lift** — do last when the surrounding state is simplified. +- **Phase 5** is the documentation sync + any cleanup of tombstone branches / temporary opt-out env vars. + +Abort early if any phase fails its perf gate. We don't pile up speculative changes. + +### Lessons from Phase 1 (apply to Phases 2-4) + +Phase 1 was rejected on its upper-bound measurement (~0.7% vs 2% gate) — the cost model ("INVOKESTATIC dispatch is hot") was wrong because HotSpot inlined the empty-case fast path inside `flush()`. Conclusion: + +**Every remaining phase MUST derisk with a profiler sample BEFORE implementation.** The per-phase workflow is now: + +1. **Upper-bound experiment first.** Patch the minimum hack that would represent the phase's theoretical best case (even if broken/unsafe) and measure life_bitpacked + bench suite. If the upper bound is < 1.5× the required gate, reject the phase without implementation. +2. **If upper-bound passes:** implement cleanly, run correctness gates, measure again. +3. **If upper-bound fails:** document in this doc and move to next phase. + +This saves ~days per rejected phase vs. a full implementation + revert cycle. + +--- + +## Phase 1 — FREETMPS-style compare gating the flush + +**Status: INVESTIGATED — REJECTED (2026-04-18)** + +### Result + +Upper-bound experiment on `perf/perl-parity-phase1` @ 3c2ca4b6a: patched `EmitStatement.java` to emit **zero** `INVOKESTATIC MortalList.flush` calls at scope exit (gated by `JPERL_DISABLE_FLUSH_EMIT=1`). This simulates the absolute best case the Phase 1 guard could achieve — a theoretical zero-cost flush skip. + +`life_bitpacked -r none -g 500`, 5 runs each, median Mcells/s: + +| Variant | Runs | Median | +|---|---|---| +| Baseline (flush emitted) | 8.93 / 8.77 / 8.80 / 8.81 / 8.78 | **8.80** | +| Upper bound (no flush emitted) | 8.95 / 8.86 / 8.90 / 8.86 / 8.76 | **8.86** | + +Improvement: ~0.7%. Well below the ≥2% Phase 1 gate. Within noise on a single bench. + +### Why this was wrong + +`MortalList.flush()`'s first instruction is `if (!active || pending.isEmpty() || flushing) return;`. HotSpot C2 inlines static call targets ≤ 35 bytes after ~10k invocations, so the "empty case" path effectively becomes three GETSTATIC-IFEQ-like checks. There is no meaningful INVOKESTATIC dispatch cost to cut once inlining takes over. + +The real cost driver on life_bitpacked is **somewhere else** — most likely the sub-call context (Phase 2) and/or refcount ops (Phase 3). + +### Decision + +Phase 1 is closed out. No code change shipped. Moving to Phase 2. + +--- + +### Goal (original, kept for archival) + +Make the common "no mortals to free" case at scope exit effectively free. + +### Background + +System perl: +```c +#define FREETMPS if (PL_tmps_ix > PL_tmps_floor) free_tmps() +``` + +One compare, zero overhead when the tmp stack is empty. + +PerlOnJava today emits at scope exit: +``` +INVOKESTATIC MortalList.flush ()V +``` + +…unconditionally. `MortalList.flush()` itself checks for an empty stack as its first action, but the INVOKESTATIC dispatch cost (~5 ns) is paid regardless. Over millions of sub calls in a tight loop, measurable. + +### Design + +Add two `public static` int fields (or thin accessors) exposing `MortalList.tmpsIx` and `MortalList.tmpsFloor`. Emit: + +``` +GETSTATIC MortalList.tmpsIx I +GETSTATIC MortalList.tmpsFloor I +IF_ICMPLE skip_flush +INVOKESTATIC MortalList.flush ()V +skip_flush: +``` + +~5 bytes of bytecode replaces 3 for the call, but saves the call dispatch when the stack is empty. + +`MortalList.flush()` stays unchanged — we're bypassing its INVOKESTATIC in the common case, not changing its semantics. + +### Risks + +- Reading the int fields is a `GETSTATIC`, which is very cheap. No correctness concern. +- If the indices are not public yet, we either expose them or add cheap static accessor helpers that the JIT can inline (a `public static int tmpsAboveFloor() { return tmpsIx - tmpsFloor; }` would be cleanest). +- Concurrent modification: `MortalList` is a ThreadLocal, so the fields are per-thread. No visibility issues. + +### Opt-out + +Env var `JPERL_NO_PHASE1=1` set at `EmitterMethodCreator` load-time forces the unconditional `INVOKESTATIC MortalList.flush` emission. Lets us A/B the exact same binary. + +### Correctness gates + +- Full `make` run green (except destroy_eval_die.t#4 pre-existing) +- destroy_eval_die.t pass count unchanged +- DBIC txn_scope_guard.t 18/18 +- tie_scalar.t 12/12 +- Quick DESTROY smoke: `./jperl -e 'package T; sub new { bless {}, shift } sub DESTROY { $::d++ } package main; { my $x = T->new; } print $::d'` should print `1` + +### Measurement gate + +- life_bitpacked: 5 runs each branch, median improvement ≥ 2% +- `refcount_bless` / `refcount_anon`: no regression > 3% +- Full bench suite snapshot committed under `dev/bench/results/` + +### Abort condition + +If the median is < 2% improvement OR any correctness gate fails OR any non-life_bitpacked benchmark regresses > 3% compared to the pre-phase baseline, **revert the phase**. Document the finding in this doc under "Phase 1 results". + +--- + +## Phase 2 — Consolidate ThreadLocal stacks into `PerlContext` + +### Goal + +Reduce per-sub-call ThreadLocal traffic from 7 separate `TL.get()` lookups to 1. Also eliminate the `HashMap` copy in `WarningBitsRegistry.pushCallerHintHash()` for the common empty-hint-hash case. + +### Background + +System perl pushes ONE `PERL_CONTEXT` struct per sub call. All caller metadata (CV, retop, savearray, old pad, warning bits, hints, etc.) is in that one struct. `cxstack` is a flat array of these structs; pushing is `cxstack[cxstack_ix++] = ...`. + +PerlOnJava today has 7 separate ThreadLocal stacks: + +1. `RuntimeCode.argsStack` +2. `RuntimeCode.pristineArgsStack` (our branch) +3. `RuntimeCode.hasArgsStack` +4. `WarningBitsRegistry.currentBitsStack` +5. `WarningBitsRegistry.callerBitsStack` +6. `WarningBitsRegistry.callerHintsStack` +7. `HintHashRegistry.callerSnapshotIdStack` + +Each push is: `ThreadLocal.get()` + `Deque.push(value)`. Seven times per sub call. + +JFR confirms the cost: 4 extra `RuntimeList` + 4 extra `ArrayList` allocations per life_bitpacked generation vs master (ArrayDeque's internal bookkeeping spills into these allocations). + +### Design + +Introduce `class PerlContext` in `org.perlonjava.runtime.runtimetypes`. Fields: + +```java +public final class PerlContext { + // args stacks + public RuntimeArray[] argsStack; int argsIx; + public List[] pristineArgsStack; int pristineIx; + public boolean[] hasArgsStack; int hasArgsIx; + + // caller context (one array of frame records) + public CallerFrame[] callerFrames; int callerFramesIx; + + // mortal / savestack state + public int tmpsIx, tmpsFloor; + // ... +} + +public static final ThreadLocal CTX = + ThreadLocal.withInitial(PerlContext::new); +``` + +One `TL.get()` at sub entry, one at sub exit. All the pushes are field + array operations. + +`CallerFrame` combines bits, hints, hintHashId into a single record. + +Existing APIs (`getCallerBitsAtFrame`, `getCallerHintsAtFrame`, `getHasArgsAt`, `getPristineArgsAt`) read from the consolidated frames array. + +The separate Registries stay as pure facades (their existing static methods delegate to the consolidated struct) so external callers don't break. + +### Risks + +- Touches many read sites. Needs thorough testing. +- Multi-phase migration: first add `PerlContext` alongside the existing stacks, make the registries read from both (prefer PerlContext), then remove the old stacks. +- Interpreter backend (`BytecodeInterpreter`) may have direct references to some of these stacks; must be updated. + +### Opt-out + +`JPERL_NO_PHASE2=1` at class-load time uses the old stacks. Adds a runtime branch on the flag in each push/pop, so we can A/B. + +### Correctness gates + +Same as Phase 1, plus: +- Run full DBIC test suite (`jcpan -t DBIx::Class`) — expect same pass count as pre-phase +- Run TT, Moo +- `make test-bundled-modules` + +### Measurement gate + +- life_bitpacked: 5 runs each, median improvement ≥ 4% over Phase 1 baseline +- No bench regresses > 3% +- Allocation profile: `RuntimeList` / `ArrayList` allocation rate cut by ≥ 50% + +### Abort condition + +If gains are < 4% OR allocation rate doesn't drop, the consolidation is not paying for its complexity — revert to just keeping the `PerlContext` as a stub for Phase 3's benefit. + +--- + +## Phase 3 — Inline refcount helpers + +### Goal + +Make `refcnt_inc` / `refcnt_dec_or_free` tiny static methods (< 20 bytes) that the JIT always inlines, moving `ScalarRefRegistry.registerRef` / `MortalList.deferDecrement` to the cold path. + +### Background + +Perl's SvREFCNT_inc is `++sv->refcnt` (1 store). SvREFCNT_dec is 4 instructions in the hot path with `sv_free2` on the cold path only. + +PerlOnJava's equivalent goes through `setLarge()` / `scopeExitCleanup` — methods that are 100-500 bytes and fail to inline under `-XX:+PrintInlining`. + +### Design + +Add to `RuntimeBase` (or a new `Refcnt` class): + +```java +public static void refcntInc(RuntimeBase base) { + if (base != null && base.refCount >= 0) { + base.refCount++; + } +} + +public static void refcntDecOrFree(RuntimeBase base) { + if (base != null && base.refCount > 1) { + base.refCount--; + } else if (base != null) { + base.refCount--; + refcntFreeColdPath(base); // separate method, out of line + } +} +``` + +Each helper body is < 20 bytes. JIT will inline eagerly at hot call sites. + +Migrate call sites in the emitter: instead of emitting `INVOKESTATIC scopeExitCleanup`, emit `INVOKESTATIC refcntDecOrFree`. `scopeExitCleanup` stays for complex cases (IO owner, capture count). + +### Risks + +- Correctness: any case where the "hot path" needs to do more than decrement (IO owner unregister, weakref clearing, MortalList pending entry) must still route through the cold path. +- The `ScalarRefRegistry.registerRef` we currently do at assignment time may conflict — need to understand when it's truly needed. + +### Opt-out + +`JPERL_NO_PHASE3=1` — emit the old INVOKESTATICs. + +### Correctness gates + +Same as Phase 2, plus: +- Specific DESTROY-correctness tests: `unit/refcount/*.t` all pass +- DBIC's 52leaks test still passes + +### Measurement gate + +- `benchmark_refcount_bless` / `benchmark_refcount_anon`: median improvement ≥ 5% over Phase 2 baseline +- life_bitpacked: no regression (this phase isn't expected to help pure numeric loops) +- JIT inlining trace: `refcntDecOrFree (N bytes)` shows `inline (hot)` at hot call sites + +### Abort condition + +Correctness: any regression is an immediate revert (refcount bugs are silent and bad). +Perf: if benchmark_refcount_* doesn't improve ≥ 3%, the win isn't worth the complexity. + +--- + +## Phase 4 — Array-backed value stack + +### Goal + +Match Perl's `PL_stack_sp` / `PL_stack_base` model. Per-thread `Object[] stack` + `int sp` replacing the current `ArrayDeque` for args and similar per-call value passing. + +### Background + +Perl's value stack is a flat `SV**` array. `PUSHs(sv)` is `*PL_stack_sp++ = sv`. `PL_stack_sp` is kept in a register across pp function bodies. Push/pop is ~1 cycle. + +PerlOnJava uses `ArrayDeque`, which: +- Boxes primitives (e.g. `hasArgsStack` push `Boolean.FALSE`) +- Requires virtual dispatch on `push`/`pop` +- Does internal resizing + +### Design + +In `PerlContext` (from Phase 2), replace `ArrayDeque` fields with: + +```java +public RuntimeArray[] argsStack = new RuntimeArray[256]; +public int argsIx; + +public void pushArgs(RuntimeArray a) { + if (argsIx == argsStack.length) argsStack = grow(argsStack); + argsStack[argsIx++] = a; +} +``` + +Same for `pristineArgsStack`, `callerFrames`. + +Critical: the grow check must be in a hot-inlineable function. A `UNLIKELY(argsIx == argsStack.length)` branch is what Perl does via the `markstack_grow()` out-of-line call. + +### Risks + +- `pushArgs`/`popArgs` have to handle both JVM and interpreter backends consistently. +- `caller()` iterates the stack by index; indexed access is actually easier than before. +- **Biggest risk:** subtle ordering bugs when arrays resize. Write a thorough stress test with deeply nested subs. + +### Opt-out + +Phase 2's `PerlContext` supports both the ArrayDeque and array-backed versions behind a feature flag. Env var `JPERL_NO_PHASE4=1` selects ArrayDeque. + +### Correctness gates + +Same as Phase 3, plus: +- Stress test: 1000+ deeply nested sub calls, verify no corruption. +- Run with `-Xss128k` to ensure the stack growth logic works correctly. + +### Measurement gate + +- life_bitpacked: median improvement ≥ 5% over Phase 3 baseline +- benchmark_method: median improvement ≥ 5% +- Bench ratios overall trending toward 1.0× perl + +### Abort condition + +As phases before: any correctness failure or insufficient perf gain triggers revert. + +--- + +## Phase 5 — Cleanup & documentation + +### Goal + +Once Phases 1-4 are stable and green, remove the opt-out env vars (they've served their purpose) and update `dev/design/next_steps.md` to reflect reality. + +### Activities + +- Remove `JPERL_NO_PHASE1`..`JPERL_NO_PHASE4` env var branches. +- Clean up any tombstone branches (`perf/perl-parity-phase-*`). +- Update `dev/design/next_steps.md` §0 tables with final numbers. +- Close-out PR #526's §0 if the numbers justify. + +--- + +## Cumulative expected impact + +Assuming each phase delivers its expected median: + +| Phase | life_bitpacked Mcells/s | vs perl | +|---|---:|---:| +| Start (PR #526 + PR #533) | 8.5 | 2.49× slower | +| After Phase 1 (+3%) | 8.8 | 2.41× slower | +| After Phase 2 (+7%) | 9.4 | 2.25× slower | +| After Phase 3 (+5% on method-heavy; pure-numeric ~unchanged) | 9.5 | 2.23× slower | +| After Phase 4 (+10%) | 10.4 | 2.04× slower | +| perl reference | 21.2 | 1.00× | + +This gets us to roughly **2× perl** on life_bitpacked — significant progress but still not parity. Closing the last 2× is beyond the scope of this plan; it's the RuntimeScalar-boxing + Java-dispatch tax that would require value types / escape-analysis improvements beyond what the current JVM can offer. + +For `benchmark_refcount_bless` (currently 6.6× perl), the expected trajectory is more favorable: + +| Phase | benchmark_refcount_bless ratio | +|---|---:| +| Start | 6.6× perl | +| After Phase 1 | 6.4× | +| After Phase 2 | 5.8× | +| After Phase 3 | ~4.5× | +| After Phase 4 | ~3.8× | + +Still more than 2× perl, reflecting that DESTROY/bless semantics need runtime machinery that C-perl embeds directly in SV. + +## Progress tracking + +### Phase 1 — in progress + +[TO BE FILLED IN DURING EXECUTION] + +### Phase 2 — pending + +### Phase 3 — pending + +### Phase 4 — pending + +### Phase 5 — pending diff --git a/src/main/java/org/perlonjava/backend/jvm/EmitStatement.java b/src/main/java/org/perlonjava/backend/jvm/EmitStatement.java index 4c2395e2d..fc092858c 100644 --- a/src/main/java/org/perlonjava/backend/jvm/EmitStatement.java +++ b/src/main/java/org/perlonjava/backend/jvm/EmitStatement.java @@ -133,32 +133,36 @@ static void emitScopeExitNullStores(EmitterContext ctx, int scopeIndex, boolean // Phase 1: Run scopeExitCleanup for scalar variables. // This defers refCount decrements for blessed references with DESTROY, // and handles IO fd recycling for anonymous filehandle globs. - for (int idx : scalarIndices) { - ctx.mv.visitVarInsn(Opcodes.ALOAD, idx); - ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC, - "org/perlonjava/runtime/runtimetypes/RuntimeScalar", - "scopeExitCleanup", - "(Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;)V", - false); + if (!MortalList.CLASSIC) { + for (int idx : scalarIndices) { + ctx.mv.visitVarInsn(Opcodes.ALOAD, idx); + ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC, + "org/perlonjava/runtime/runtimetypes/RuntimeScalar", + "scopeExitCleanup", + "(Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;)V", + false); + } } // Phase 1b: Walk hash/array variables for nested blessed references. // When a hash/array goes out of scope, any blessed refs stored inside // (or nested inside sub-containers) need their refCounts decremented. - for (int idx : hashIndices) { - ctx.mv.visitVarInsn(Opcodes.ALOAD, idx); - ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC, - "org/perlonjava/runtime/runtimetypes/MortalList", - "scopeExitCleanupHash", - "(Lorg/perlonjava/runtime/runtimetypes/RuntimeHash;)V", - false); - } - for (int idx : arrayIndices) { - ctx.mv.visitVarInsn(Opcodes.ALOAD, idx); - ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC, - "org/perlonjava/runtime/runtimetypes/MortalList", - "scopeExitCleanupArray", - "(Lorg/perlonjava/runtime/runtimetypes/RuntimeArray;)V", - false); + if (!MortalList.CLASSIC) { + for (int idx : hashIndices) { + ctx.mv.visitVarInsn(Opcodes.ALOAD, idx); + ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC, + "org/perlonjava/runtime/runtimetypes/MortalList", + "scopeExitCleanupHash", + "(Lorg/perlonjava/runtime/runtimetypes/RuntimeHash;)V", + false); + } + for (int idx : arrayIndices) { + ctx.mv.visitVarInsn(Opcodes.ALOAD, idx); + ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC, + "org/perlonjava/runtime/runtimetypes/MortalList", + "scopeExitCleanupArray", + "(Lorg/perlonjava/runtime/runtimetypes/RuntimeArray;)V", + false); + } } // Phase 2: Null all my variable slots to help GC collect associated objects. // For anonymous filehandle globs, this makes them unreachable so the @@ -178,7 +182,7 @@ static void emitScopeExitNullStores(EmitterContext ctx, int scopeIndex, boolean // sub never uses bless/weaken/user-sub-calls/etc.), the stack is // guaranteed empty for this sub's lexicals, so the unregister // loop is dead code. Skipping it is the win this fast path buys. - if (!skipMyVarCleanup) { + if (!skipMyVarCleanup && !MortalList.CLASSIC) { for (int idx : allIndices) { ctx.mv.visitVarInsn(Opcodes.ALOAD, idx); ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC, @@ -209,7 +213,7 @@ static void emitScopeExitNullStores(EmitterContext ctx, int scopeIndex, boolean // boundaries), not entries that need to be preserved. // Flush when requested (non-sub, non-do blocks) even without my-variables, // because pending entries may exist from inner sub scope exits. - if (flush) { + if (flush && !MortalList.CLASSIC) { ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC, "org/perlonjava/runtime/runtimetypes/MortalList", "flush", diff --git a/src/main/java/org/perlonjava/backend/jvm/EmitVariable.java b/src/main/java/org/perlonjava/backend/jvm/EmitVariable.java index f65948b8a..e1ad5ba67 100644 --- a/src/main/java/org/perlonjava/backend/jvm/EmitVariable.java +++ b/src/main/java/org/perlonjava/backend/jvm/EmitVariable.java @@ -1542,7 +1542,7 @@ static void handleMyOperator(EmitterVisitor emitterVisitor, OperatorNode node) { // if die propagates through this subroutine without eval. // State/our variables are excluded: state persists across calls, // our is global. register() is a no-op until the first bless(). - if (operator.equals("my")) { + if (operator.equals("my") && !org.perlonjava.runtime.runtimetypes.MortalList.CLASSIC) { emitterVisitor.ctx.mv.visitVarInsn(Opcodes.ALOAD, varIndex); emitterVisitor.ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC, "org/perlonjava/runtime/runtimetypes/MyVarCleanupStack", diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index e8a6d2754..c33a74502 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,14 +33,14 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "078e0b3d7"; + public static final String gitCommitId = "3c2ca4b6a"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitDate = "2026-04-21"; + public static final String gitCommitDate = "2026-04-22"; /** * Build timestamp in Perl 5 "Compiled at" format (e.g., "Apr 7 2026 11:20:00"). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 21 2026 23:11:19"; + public static final String buildTimestamp = "Apr 22 2026 11:42:30"; // Prevent instantiation private Configuration() { diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/MortalList.java b/src/main/java/org/perlonjava/runtime/runtimetypes/MortalList.java index 91ee9fde8..8b08173ad 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/MortalList.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/MortalList.java @@ -26,6 +26,22 @@ public class MortalList { // as a trivially-predicted branch; the JIT will elide them. public static boolean active = true; + /** + * Experiment #3 — cumulative-tax hypothesis. + * When {@code JPERL_CLASSIC=1}, collapse the branch's added refcount/ + * walker/weaken/DESTROY machinery to a no-op where possible, so the + * runtime behaves as close to pre-merge master as we can get with a + * single static flag. Used to measure whether the full master→branch + * regression is recoverable at all by disabling the machinery. + * Breaks DESTROY + weaken + walker semantics; only safe for benchmarks. + */ + public static final boolean CLASSIC = + System.getenv("JPERL_CLASSIC") != null; + + static { + if (CLASSIC) active = false; + } + // List of RuntimeBase references awaiting decrement. // Populated by delete() when removing tracked elements. // Drained at statement boundaries (FREETMPS equivalent). diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/MyVarCleanupStack.java b/src/main/java/org/perlonjava/runtime/runtimetypes/MyVarCleanupStack.java index f1bffaf71..3f5117649 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/MyVarCleanupStack.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/MyVarCleanupStack.java @@ -72,6 +72,7 @@ public static int pushMark() { * @param var the RuntimeScalar, RuntimeHash, or RuntimeArray object */ public static void register(Object var) { + if (MortalList.CLASSIC) return; stack.add(var); // liveCounts is only consulted by ReachabilityWalker.sweepWeakRefs, // which runs only when WeakRefRegistry.weakRefsExist is true. For @@ -100,7 +101,7 @@ public static void register(Object var) { * @param var the RuntimeScalar/Array/Hash previously registered */ public static void unregister(Object var) { - if (var == null) return; + if (var == null || MortalList.CLASSIC) return; // Block-scoped my-vars pop in reverse declaration order, so // scan from the top of the stack for a fast amortized match. for (int i = stack.size() - 1; i >= 0; i--) { diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java index 84344bb86..a00ecdc7a 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java @@ -942,6 +942,13 @@ private RuntimeScalar setLarge(RuntimeScalar value) { * Separated to keep setLarge() small enough for JIT inlining of set(). */ private RuntimeScalar setLargeRefCounted(RuntimeScalar value) { + // Experiment #3: master-like classic path. Skip all refcount / + // WeakRefRegistry / MortalList work, just do the assignment. + if (MortalList.CLASSIC) { + this.type = value.type; + this.value = value.value; + return this; + } // Fast path for untracked references (refCount == -1). // Most reference assignments involve untracked objects (named variables, // anonymous arrays/hashes that were never blessed). Skip all refCount @@ -2335,6 +2342,7 @@ private void closeIOOnDrop() { */ public static void scopeExitCleanup(RuntimeScalar scalar) { if (scalar == null) return; + if (MortalList.CLASSIC) return; // Fast path: skip if no special state (most common case for integer/string vars). // When all three conditions are true, the entire method body is a no-op: From 40e19e7a8c4a00f4c71185006e028c4c86a5ae53 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Wed, 22 Apr 2026 11:57:04 +0200 Subject: [PATCH 2/6] perf(phase-r): skip MyVarCleanupStack.register emission for simple subs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends the existing CleanupNeededVisitor analysis (which already gates MyVarCleanupStack.unregister emission) to also gate the matching register() emission on every `my` variable declaration. When CleanupNeededVisitor proves the enclosing sub has no bless/weaken/ tie/untie/user-sub-call/eval-STRING/nested-sub/local, no tracked ref can ever land in its my-variables. The register/unregister pair is pure overhead for such "simple leaf" subs. The visitor already proved this was safe for the unregister side; this commit applies the same proof to the register side. Also: teach CleanupNeededVisitor about tie/untie — those invoke user- written TIESCALAR/TIEHASH/TIEARRAY/UNTIE methods which can do bless and must be treated as user sub calls. Previously this was a latent imprecision that didn't matter because only unregister was gated; with the register gate in place, tie_scalar.t and tie_array.t would regress without this fix. Results (median of 5 runs on life_bitpacked -g 500): baseline : 8.51 Mcells/s phase R (narrow) : 12.70 Mcells/s (1.49x) JPERL_CLASSIC=1 ref: 12.87 Mcells/s (the theoretical ceiling — see classic_experiment_finding.md) benchmark_lexical (median of 3): baseline : 314k iters/s phase R : 391k iters/s (1.25x) Correctness: all unit tests pass (same pre-existing destroy_eval_die.t#4 failure on both before and after). tie_scalar.t 12/12, tie_array.t 29/29, all 12 DESTROY/weaken tests pass with same counts. We deliberately do NOT gate Phase 1 (scopeExitCleanup per scalar), Phase 1b (scopeExitCleanupHash/Array), or Phase 3 (MortalList.flush) on cleanupNeeded — per the safety note already in EmitStatement, those must fire even for "simple" subs because @_ can bring in tracked refs from callers. Gating them was tried and broke tie_scalar.t test 11/12; the narrow gate is the safe sweet spot. See dev/design/classic_experiment_finding.md for the investigation that derived this approach. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../org/perlonjava/backend/jvm/EmitStatement.java | 9 +++++++++ .../java/org/perlonjava/backend/jvm/EmitVariable.java | 11 ++++++++++- src/main/java/org/perlonjava/core/Configuration.java | 4 ++-- .../frontend/analysis/CleanupNeededVisitor.java | 8 ++++++++ 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/perlonjava/backend/jvm/EmitStatement.java b/src/main/java/org/perlonjava/backend/jvm/EmitStatement.java index fc092858c..2e2051aed 100644 --- a/src/main/java/org/perlonjava/backend/jvm/EmitStatement.java +++ b/src/main/java/org/perlonjava/backend/jvm/EmitStatement.java @@ -122,6 +122,15 @@ static void emitScopeExitNullStores(EmitterContext ctx, int scopeIndex, boolean // // JPERL_FORCE_CLEANUP=1 forces cleanupNeeded=true at the // EmitterMethodCreator level for correctness debugging. + // + // Phase R (classic_experiment_finding.md): we EXTEND the existing + // skipMyVarCleanup gate to also suppress MyVarCleanupStack.register + // emission on `my` declarations in EmitVariable. We deliberately + // leave Phase 1/1b (scopeExitCleanup, cleanupHash/Array) and Phase 3 + // (MortalList.flush) emitting unconditionally, per the safety note + // above — those fire DESTROY for refs that entered via @_ even if + // the sub's AST has no bless/weaken/user-sub-call and was marked + // cleanupNeeded=false. boolean skipMyVarCleanup = !ctx.javaClassInfo.cleanupNeeded; // Only emit flush when there are variables that need cleanup. diff --git a/src/main/java/org/perlonjava/backend/jvm/EmitVariable.java b/src/main/java/org/perlonjava/backend/jvm/EmitVariable.java index e1ad5ba67..12bee7c15 100644 --- a/src/main/java/org/perlonjava/backend/jvm/EmitVariable.java +++ b/src/main/java/org/perlonjava/backend/jvm/EmitVariable.java @@ -1542,7 +1542,16 @@ static void handleMyOperator(EmitterVisitor emitterVisitor, OperatorNode node) { // if die propagates through this subroutine without eval. // State/our variables are excluded: state persists across calls, // our is global. register() is a no-op until the first bless(). - if (operator.equals("my") && !org.perlonjava.runtime.runtimetypes.MortalList.CLASSIC) { + // + // Phase R (classic_experiment_finding.md): skip emission when + // CleanupNeededVisitor proved the enclosing sub has no + // bless/weaken/user-sub-calls — no tracked ref can ever land + // in this my-var, so register/unregister pair is dead code. + // CLASSIC is the global kill switch; cleanupNeeded is the + // per-sub correctness-safe analog. + if (operator.equals("my") + && emitterVisitor.ctx.javaClassInfo.cleanupNeeded + && !org.perlonjava.runtime.runtimetypes.MortalList.CLASSIC) { emitterVisitor.ctx.mv.visitVarInsn(Opcodes.ALOAD, varIndex); emitterVisitor.ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC, "org/perlonjava/runtime/runtimetypes/MyVarCleanupStack", diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index c33a74502..21f35ec60 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "3c2ca4b6a"; + public static final String gitCommitId = "ea7c66811"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 22 2026 11:42:30"; + public static final String buildTimestamp = "Apr 22 2026 11:55:05"; // Prevent instantiation private Configuration() { diff --git a/src/main/java/org/perlonjava/frontend/analysis/CleanupNeededVisitor.java b/src/main/java/org/perlonjava/frontend/analysis/CleanupNeededVisitor.java index 141483285..954dabfb6 100644 --- a/src/main/java/org/perlonjava/frontend/analysis/CleanupNeededVisitor.java +++ b/src/main/java/org/perlonjava/frontend/analysis/CleanupNeededVisitor.java @@ -83,6 +83,14 @@ public void visit(OperatorNode node) { mark(); return; } + // tie/untie invoke user-written TIESCALAR/TIEHASH/TIEARRAY/UNTIE + // methods which can do bless etc. — treat as user sub call. + // Phase R: without this, tie_scalar.t / tie_array.t regress when + // scopeExitCleanup emission is gated on cleanupNeeded. + if ("tie".equals(node.operator) || "untie".equals(node.operator)) { + mark(); + return; + } if (node.operand != null) node.operand.accept(this); } From de4063c4738efc2561d6fe175e27a29fdde5bf19 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Wed, 22 Apr 2026 16:06:48 +0200 Subject: [PATCH 3/6] test(harness): fix Latin-1 byte corruption in module test runner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ModuleTestExecutionTest.executeModuleTest() was reading Perl source files as strict UTF-8, which corrupted tests that embed literal Latin-1 bytes in single-quoted string literals — notably Text-CSV/t/55_combi.t with byte 0xE4 ('ä') in its @input. The corruption chain: 1. Source file byte 0xE4 is NOT valid UTF-8 (start byte needing 2 continuation bytes but followed by 'r' = 0x72). 2. Strict UTF-8 decode replaces 0xE4 with U+FFFD in the Java String. 3. Perl compiler treats non-ASCII char in string literal as needing UTF-8 encoding, emits 3 bytes EF BF BD. 4. Text::CSV's decode_utf8 fast path detects 3 valid UTF-8 bytes and decodes them back to 1 char U+FFFD, collapsing length from 22 to 20. 5. is($ret, $string) fails with `br` vs `bär`, cascading into thousands of failures across the combinatorial iteration. Fix is the same pattern FileUtils.readFileWithEncodingDetection uses when ./jperl detects an ISO-8859-1 source: - Read bytes as ISO-8859-1 (1:1 byte-to-char preservation). - Set options.isByteStringSource = true so the parser preserves non- ASCII chars as single-byte values instead of UTF-8-encoding them. After this fix: `make test-bundled-modules` now passes 176/176 (was 175/176 with 55_combi.t failing). Confirmed no-regression: - Moo: 841/841 - Template::Toolkit: 2920/2920 - life_bitpacked Phase R speedup unchanged (~13 Mc/s vs 8.5 baseline) - `make` unit tests: same pre-existing destroy_eval_die.t#4 only This is a test-harness-only change; no production code affected. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../java/org/perlonjava/core/Configuration.java | 4 ++-- .../org/perlonjava/ModuleTestExecutionTest.java | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index 21f35ec60..55382d5b2 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "ea7c66811"; + public static final String gitCommitId = "40e19e7a8"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 22 2026 11:55:05"; + public static final String buildTimestamp = "Apr 22 2026 16:05:28"; // Prevent instantiation private Configuration() { diff --git a/src/test/java/org/perlonjava/ModuleTestExecutionTest.java b/src/test/java/org/perlonjava/ModuleTestExecutionTest.java index 3b6a012ad..b7b65375d 100644 --- a/src/test/java/org/perlonjava/ModuleTestExecutionTest.java +++ b/src/test/java/org/perlonjava/ModuleTestExecutionTest.java @@ -182,13 +182,27 @@ private void executeModuleTest(String filename) { Path moduleDir = resolveModuleDir(filename); System.setProperty("user.dir", moduleDir.toAbsolutePath().toString()); - String content = new String(inputStream.readAllBytes(), StandardCharsets.UTF_8); + // Read source bytes as ISO-8859-1 so each byte maps 1:1 to a char. + // Perl source files are not always UTF-8 (e.g., Text-CSV/t/55_combi.t + // embeds literal byte 0xE4 for 'ä' in a single-quoted string). + // Strict UTF-8 decoding would replace such bytes with U+FFFD, and + // the Perl compiler would then re-encode that U+FFFD back to its + // 3-byte UTF-8 representation (EF BF BD) in the compiled string. + // ISO-8859-1 passes every byte through unchanged as a char, which + // matches what ./jperl does when reading from the filesystem. + String content = new String(inputStream.readAllBytes(), StandardCharsets.ISO_8859_1); if (content.indexOf('\r') >= 0) { content = content.replace("\r\n", "\n").replace("\r", "\n"); } CompilerOptions options = new CompilerOptions(); options.code = content; + // Mark source as raw bytes so the parser preserves non-ASCII bytes + // (e.g., Latin-1 0xE4) as single-byte chars rather than re-encoding + // them as UTF-8 sequences in compiled string literals. + // Matches what FileUtils.readFileWithEncodingDetection does for + // ./jperl when it detects ISO-8859-1 source. + options.isByteStringSource = true; // Set fileName relative to the module directory (CWD) so $0, FindBin, etc. resolve correctly // e.g., "module/Net-SSLeay/t/local/05_passwd_cb.t" -> "t/local/05_passwd_cb.t" Path moduleDirRel = Paths.get("module", filename.split("[/\\\\]")[1]); From bd50873f7aae7d645f558e3a6f64356f0167fe9e Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Wed, 22 Apr 2026 23:16:41 +0200 Subject: [PATCH 4/6] fix(cpan): distroprefs skip install of Class-XSAccessor (prefer bundled shim) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PerlOnJava ships a pure-Perl re-implementation of Class::XSAccessor bundled in the jar (lib/Class/XSAccessor.pm). The upstream CPAN distribution is XS-only and fails at runtime with "Can't load loadable object for module Class::XSAccessor: no Java XS implementation available" because PerlOnJava has no XS loader. Without this distroprefs entry, `jcpan -t ` recurses on Class::XSAccessor as a transitive dependency of Moo / DBIx::Class / Class::Method::Modifiers / ..., installs the XS version into ~/.perlonjava/lib/, and shadows the bundled shim. The shadowed upstream version fires: Class::XSAccessor exists but failed to load with error: Can't load loadable object for module Class::XSAccessor: no Java XS implementation available Compilation failed in require at .../Moo/_Utils.pm line 162. on every `use Moo` / `use DBIx::Class::Row` etc., silently breaking every module that imports Class::XSAccessor at runtime. Fix: - Add Class-XSAccessor.yml distroprefs matching "^SMUELLER/Class- XSAccessor-" with all of pl/make/test/install as no-op commands. - Bundle it via CPAN/Config.pm's _bootstrap_prefs so ~/.perlonjava/ cpan/prefs/Class-XSAccessor.yml is auto-installed on first CPAN use. Reproduction before fix (on a fresh ~/.perlonjava): $ rm /Users/fglock/.perlonjava/lib/Class/XSAccessor.pm $ ./jcpan -t DBIx::Class # triggers reinstall of XS version ... thousands of "Can't load loadable object" errors ... t/cdbi/columns_as_hashes.t ... Failed 1/2 subtests t/count/grouped_pager.t ...... Failed 7/7 subtests (timeouts) ... more cascading failures ... After fix: $ use Moo; # no warnings $ grep -c "Class::XSAccessor exists but failed" /tmp/jcpan_output 0 Remaining DBIC test failures observed in the full-suite run are pre-existing timeouts (300s Test::Harness cap hit) under heavy machine contention, unrelated to this fix or Phase R — they run fast and pass in isolation. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../org/perlonjava/core/Configuration.java | 4 +-- src/main/perl/lib/CPAN/Config.pm | 28 +++++++++++++++++++ .../perl/lib/CPAN/Prefs/Class-XSAccessor.yml | 25 +++++++++++++++++ 3 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 src/main/perl/lib/CPAN/Prefs/Class-XSAccessor.yml diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index 55382d5b2..c182ac09f 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "40e19e7a8"; + public static final String gitCommitId = "de4063c47"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 22 2026 16:05:28"; + public static final String buildTimestamp = "Apr 22 2026 22:01:34"; // Prevent instantiation private Configuration() { diff --git a/src/main/perl/lib/CPAN/Config.pm b/src/main/perl/lib/CPAN/Config.pm index 6aa385cbb..21dd5ce64 100644 --- a/src/main/perl/lib/CPAN/Config.pm +++ b/src/main/perl/lib/CPAN/Config.pm @@ -56,6 +56,34 @@ pl: - "--pp" env: PARAMS_VALIDATE_IMPLEMENTATION: PP +YAML + 'Class-XSAccessor.yml' => <<'YAML', +--- +comment: | + PerlOnJava ships a pure-Perl re-implementation of Class::XSAccessor + bundled in the jar (lib/Class/XSAccessor.pm). The upstream CPAN + distribution is XS-only and fails at runtime with "Can't load + loadable object for module Class::XSAccessor: no Java XS + implementation available" because PerlOnJava has no XS loader. + + Without this distroprefs entry, `jcpan -t ` recurses on + Class::XSAccessor as a transitive dependency of Moo / DBIx::Class + / Class::Method::Modifiers / ..., installs the XS version into + ~/.perlonjava/lib/, and shadows the bundled shim — silently + breaking every module that imports Class::XSAccessor at runtime. + + Skip the build/test/install steps entirely; the bundled shim in + the jar is all PerlOnJava needs. +match: + distribution: "^SMUELLER/Class-XSAccessor-" +pl: + commandline: "true" +make: + commandline: "true" +test: + commandline: "true" +install: + commandline: "true" YAML ); diff --git a/src/main/perl/lib/CPAN/Prefs/Class-XSAccessor.yml b/src/main/perl/lib/CPAN/Prefs/Class-XSAccessor.yml new file mode 100644 index 000000000..9fd8f359f --- /dev/null +++ b/src/main/perl/lib/CPAN/Prefs/Class-XSAccessor.yml @@ -0,0 +1,25 @@ +--- +comment: | + PerlOnJava ships a pure-Perl re-implementation of Class::XSAccessor + bundled in the jar (src/main/perl/lib/Class/XSAccessor.pm). The + upstream CPAN distribution is XS-only and fails at runtime with + "Can't load loadable object for module Class::XSAccessor: no Java XS + implementation available" because PerlOnJava has no XS loader. + + Without this distroprefs entry, `jcpan -t SomeModule` can recurse on + Class::XSAccessor as a transitive dependency of Moo/DBIC/etc., install + the XS version into ~/.perlonjava/lib/, and shadow the bundled shim — + breaking every module that imports Class::XSAccessor at runtime. + + Skip the build/test/install steps entirely; the bundled shim is all + PerlOnJava needs. +match: + distribution: "^SMUELLER/Class-XSAccessor-" +pl: + commandline: "true" +make: + commandline: "true" +test: + commandline: "true" +install: + commandline: "true" From 4a1ad046ba6e704e365ec65fad7c1d118c4a8072 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Thu, 23 Apr 2026 09:28:35 +0200 Subject: [PATCH 5/6] fix(closure): track captureCount for named subs; extract helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause analysis of the DBIC t/storage/base.t hot-loop hang (infinite loop in ArrayList.add called from RuntimeScalar.setArrayOfAlias): The hang is triggered by Sub::Defer's deferred sub pattern, where defer_sub() stores $DEFERRED{$deferred} = $deferred_info with a weaken(). The eval-compiled deferred sub is supposed to close over $deferred_info, keeping it alive so the %DEFERRED weak entry stays defined. In PerlOnJava, the closure capture path for named subs (whether eval-compiled or regular) was skipping the captureCount bump that anonymous subs get via makeCodeObject. As a result: 1. Outer lexical $deferred_info has captureCount=0 despite being captured by the deferred sub's body. 2. When defer_sub() returns, scopeExitCleanup on $deferred_info sees captureCount=0 and takes the normal-cleanup path, decrementing the referent's refCount. 3. refCount drops to 0, triggering clearWeakRefsTo, which undefs $DEFERRED{$deferred}. 4. On first call to the deferred sub, undefer_sub() looks up $DEFERRED{$deferred}, gets undef, falls through to `return $deferred`. 5. The caller's $undeferred now equals $deferred, so `goto &$undeferred` re-enters the deferred sub itself — infinite trampoline loop. This commit addresses the named-sub side of the bug: 1. Extract the makeCodeObject capture-tracking loop into a public helper RuntimeCode.trackClosureCaptures(code, codeObject, clazz), which walks the generated class's RuntimeScalar fields and increments captureCount on each. 2. Call trackClosureCaptures from SubroutineParser after the named sub's codeObject is instantiated, matching the anonymous-sub path. 3. Refine EmitSubroutine's isPackageSub check to NOT clear visibleVariables when we're emitting a named sub inside an eval-string context. This matches Perl 5 semantics: named subs defined inside eval STRING do close over outer lexicals. This is a partial fix: named subs OUTSIDE eval-string (and anonymous subs via makeCodeObject) now correctly bump captureCount on their captured lexicals. However, named subs DEFINED INSIDE eval-string still route through a path that captures copies of outer scalars rather than the originals — the original issue reported (`./jperl /tmp/defer_long.pl` hanging on Sub::Defer with attributes option) is not yet fully resolved. Verified no regressions: - life_bitpacked: 13.0 Mc/s (Phase R speedup preserved) - txn.t (direct, isolated): 90/90 PASS - make: only pre-existing destroy_eval_die.t#4 (same as baseline) Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../backend/jvm/EmitSubroutine.java | 12 +++- .../org/perlonjava/core/Configuration.java | 4 +- .../frontend/parser/SubroutineParser.java | 9 +++ .../runtime/runtimetypes/RuntimeCode.java | 58 +++++++++++++------ 4 files changed, 60 insertions(+), 23 deletions(-) diff --git a/src/main/java/org/perlonjava/backend/jvm/EmitSubroutine.java b/src/main/java/org/perlonjava/backend/jvm/EmitSubroutine.java index 82c0f27f2..1989cd8a1 100644 --- a/src/main/java/org/perlonjava/backend/jvm/EmitSubroutine.java +++ b/src/main/java/org/perlonjava/backend/jvm/EmitSubroutine.java @@ -99,8 +99,16 @@ public static void emitSubroutine(EmitterContext ctx, SubroutineNode node) { // definition context. Only anonymous subs (my sub, state sub, or true anonymous subs) should // capture variables. This prevents issues like defining 'sub bar::foo' inside a block with // 'our sub foo' from incorrectly capturing the 'our sub' as a closure variable. - // Note: "(eval)" is a special name for eval blocks which should capture variables like anonymous subs - boolean isPackageSub = node.name != null && !node.name.equals("") && !node.name.equals("(eval)"); + // Note: "(eval)" is a special name for eval blocks which should capture variables like anonymous subs. + // + // Exception: named subs defined inside an eval-string DO need to capture outer lexicals. + // This matches Perl 5 semantics: `eval "sub outer_name { \$outer_var }"` closes over \$outer_var. + // Without this exception, Sub::Defer (which eval-compiles deferred subs that close over + // \$undeferred and \$deferred_info) loses its closure captures, leaving \$deferred_info + // unreferenced after defer_sub returns. The weakened %DEFERRED entry then immediately + // clears, and the deferred sub's `goto &\$undeferred` loops into itself forever. + boolean isPackageSub = node.name != null && !node.name.equals("") && !node.name.equals("(eval)") + && !ctx.javaClassInfo.isInEvalString; if (isPackageSub) { // Package subs should not capture any closure variables // They can only access global variables and their parameters diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index c182ac09f..88af9d866 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "de4063c47"; + public static final String gitCommitId = "bd50873f7"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 22 2026 22:01:34"; + public static final String buildTimestamp = "Apr 23 2026 09:27:22"; // Prevent instantiation private Configuration() { diff --git a/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java b/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java index 2a4243c35..7b253f2fa 100644 --- a/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java +++ b/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java @@ -1299,6 +1299,15 @@ public static ListNode handleNamedSubWithFilter(Parser parser, String subName, S Field field = placeholder.codeObject.getClass().getDeclaredField("__SUB__"); field.set(placeholder.codeObject, codeRef); + // Track closure captures: increment captureCount on each + // captured outer lexical, so scopeExitCleanup knows this + // named sub holds a strong ref to them. Without this, weaken- + // based patterns like Sub::Defer's %DEFERRED registry clear + // immediately after defer_sub returns, causing the deferred + // sub's `goto &$undeferred` to loop into itself forever. + // The anon-sub path already does this inside makeCodeObject(). + RuntimeCode.trackClosureCaptures(placeholder, placeholder.codeObject, generatedClass); + } else if (runtimeCode instanceof InterpretedCode interpretedCode) { // InterpretedCode path - update placeholder in-place (not replace codeRef.value) // This is critical: hash assignments copy RuntimeScalar but share the same diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java index 8286a443e..087f54848 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java @@ -1630,6 +1630,44 @@ public static RuntimeScalar makeCodeObject(Object codeObject) throws Exception { return makeCodeObject(codeObject, null); } + /** + * Track closure captures: iterate all RuntimeScalar fields of the generated + * code class (except the self-referencing __SUB__), increment captureCount + * on each, and store the list on the RuntimeCode for eventual release when + * the CODE ref's refCount drops to 0. + * + *

Called from both {@link #makeCodeObject} (anonymous subs installed + * via {@code sub {...}}) and SubroutineParser (named subs). Without this + * call, named subs defined inside an eval-string leak their closure + * captures — captureCount stays at 0 on the outer lexicals, causing + * scopeExitCleanup to treat them as uncaptured and prematurely decrement + * referent refCounts. That in turn breaks weaken-based patterns like + * Sub::Defer's %DEFERRED registry, which relies on closure-kept-alive + * lexicals to keep weakened hash entries defined until the first call + * to the deferred sub. + */ + public static void trackClosureCaptures(RuntimeCode code, Object codeObject, Class clazz) throws IllegalAccessException { + Field[] allFields = clazz.getDeclaredFields(); + List captured = new ArrayList<>(); + for (Field f : allFields) { + if (f.getType() == RuntimeScalar.class && !"__SUB__".equals(f.getName())) { + RuntimeScalar capturedVar = (RuntimeScalar) f.get(codeObject); + if (capturedVar != null) { + captured.add(capturedVar); + capturedVar.captureCount++; + } + } + } + if (!captured.isEmpty()) { + code.capturedScalars = captured.toArray(new RuntimeScalar[0]); + // Enable refCount tracking for closures with captures. + // When the CODE ref's refCount drops to 0, releaseCaptures() + // fires (via DestroyDispatch.callDestroy), letting captured + // blessed objects run DESTROY. + code.refCount = 0; + } + } + public static RuntimeScalar makeCodeObject(Object codeObject, String prototype) throws Exception { return makeCodeObject(codeObject, prototype, null); } @@ -1673,25 +1711,7 @@ public static RuntimeScalar makeCodeObject(Object codeObject, String prototype, // Each instance field of type RuntimeScalar (except __SUB__) is a // captured lexical variable. We store them so that releaseCaptures() // can decrement blessed ref refCounts when the closure is discarded. - Field[] allFields = clazz.getDeclaredFields(); - List captured = new ArrayList<>(); - for (Field f : allFields) { - if (f.getType() == RuntimeScalar.class && !"__SUB__".equals(f.getName())) { - RuntimeScalar capturedVar = (RuntimeScalar) f.get(codeObject); - if (capturedVar != null) { - captured.add(capturedVar); - capturedVar.captureCount++; - } - } - } - if (!captured.isEmpty()) { - code.capturedScalars = captured.toArray(new RuntimeScalar[0]); - // Enable refCount tracking for closures with captures. - // When the CODE ref's refCount drops to 0, releaseCaptures() - // fires (via DestroyDispatch.callDestroy), letting captured - // blessed objects run DESTROY. - code.refCount = 0; - } + trackClosureCaptures(code, codeObject, clazz); RuntimeScalar codeRef = new RuntimeScalar(code); From 1c79bbc7bb773d9de073297b94f98de9e5b190e5 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Thu, 23 Apr 2026 10:29:50 +0200 Subject: [PATCH 6/6] =?UTF-8?q?fix(B):=20B::NULL=20is=20terminal=20?= =?UTF-8?q?=E2=80=94=20return=20undef=20from=20all=20accessors?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every `->next` chain walker that uses the common idiom my $op = $cv->START; while ($op) { ... last unless $op->can('next'); $op = $op->next; } was previously trapped in an infinite loop on PerlOnJava. B::NULL is the sentinel marking end-of-chain, but its `next` method returned `$_[0]` (self) — so `$op` stayed pinned on B::NULL forever: 1. `$op->can('line')` → true (inherited from B::OP) 2. `$op->can('next')` → true (inherited from B::OP) 3. `$op = $op->next` → same B::NULL instance (not undef) 4. `while ($op)` → still truthy (blessed hashref) 5. goto 1 In real Perl, B::NULL is XS-backed and every accessor returns undef — yielding `$op = undef` at step 3, which terminates the `while ($op)` loop on the next iteration. This blew up visibly in Test2-heavy code, where `Test2::Util::Sub::sub_info` walks the optree of every comparison callback. Each call allocated an unbounded `@all_lines` list, driving the JVM into GC-thrash (observed as 13 parallel GC threads × 64 s CPU with the main thread making only 25 % of one core of forward progress). Tests affected (any time `Test2::Util::Sub::sub_info` runs, i.e., any Test2 comparison with a coderef argument): * Hash::Wrap t/as_return.t — 3 m 20 s timeout → 1.4 s wallclock * DBIx::Class full test suite (many subtests use Test2 deep compare) * Anything using `Test2::Tools::Compare::like` / `is_deeply` with callback-shaped expectations (meta / object / call / prop DSL) Fix: replace B::NULL's method overrides with undef-returning stubs for every accessor a walker might call after reaching the null op — `next`, `line`, `file`, `sibling`, `first`, `last`, `targ`. This matches real Perl's XS B::NULL behavior. Verified: * Hash::Wrap t/as_return.t: 3 m 20 s (hang/timeout) → 1.4 s wallclock * DBIx::Class t/storage/txn.t: 90/90 passes, same as before * life_bitpacked: ~13 Mcells/s, Phase R speedup preserved * make: only pre-existing destroy_eval_die.t#4 fails (same as baseline) See dev/design/hash_wrap_triage_plan.md for the full triage plan — this commit is Phase 0. Phases 1-3 address the residual distributed-machinery overhead (the actual GC pressure class of problem). Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- dev/bench/hash_wrap_repro/lib/Hash/Wrap.pm | 1627 +++++++++++++++++ dev/design/hash_wrap_triage_plan.md | 211 +++ .../org/perlonjava/core/Configuration.java | 6 +- src/main/perl/lib/B.pm | 28 +- 4 files changed, 1865 insertions(+), 7 deletions(-) create mode 100644 dev/bench/hash_wrap_repro/lib/Hash/Wrap.pm create mode 100644 dev/design/hash_wrap_triage_plan.md diff --git a/dev/bench/hash_wrap_repro/lib/Hash/Wrap.pm b/dev/bench/hash_wrap_repro/lib/Hash/Wrap.pm new file mode 100644 index 000000000..d362afdee --- /dev/null +++ b/dev/bench/hash_wrap_repro/lib/Hash/Wrap.pm @@ -0,0 +1,1627 @@ +package Hash::Wrap; + +# ABSTRACT: create on-the-fly objects from hashes + +use 5.01000; + +use strict; +use warnings; + +use Scalar::Util; +use Digest::MD5; +our $VERSION = '1.09'; + +our @EXPORT = qw[ wrap_hash ]; + +our @CARP_NOT = qw( Hash::Wrap ); +our $DEBUG = 0; + +# copied from Damian Conway's PPR: PerlIdentifier +use constant PerlIdentifier => qr/\A([^\W\d]\w*+)\z/; + +# use builtin::export_lexically if available +use constant HAS_LEXICAL_SUBS => $] >= 5.038; +use if HAS_LEXICAL_SUBS, 'experimental', 'builtin'; +use if HAS_LEXICAL_SUBS, 'builtin'; + +our %REGISTRY; + +sub _croak { + require Carp; + goto \&Carp::croak; +} + +sub _croak_class_method { + my ( $class, $method ) = @_; + $class = ref( $class ) || $class; + _croak( qq[Can't locate class method "$method" via package "$class"] ); +} + +sub _croak_object_method { + my ( $object, $method ) = @_; + my $class = Scalar::Util::blessed( $object ) || ref( $object ) || $object; + _croak( qq[Can't locate object method "$method" via package "$class"] ); +} + +sub _find_symbol { + my ( $package, $symbol, $reftype ) = @_; + + no strict 'refs'; ## no critic (ProhibitNoStrict) + my $candidate = *{"$package\::$symbol"}{SCALAR}; + + return $$candidate + if defined $candidate + && 2 == grep { defined $_->[0] && defined $_->[1] ? $_->[0] eq $_->[1] : 1 } + [ $reftype->[0], Scalar::Util::reftype $candidate ], + [ $reftype->[1], Scalar::Util::reftype $$candidate ]; + + _croak( "Unable to find scalar \$$symbol in class $package" ); +} + +# this is called only if the method doesn't exist. +sub _generate_accessor { + my ( $hash_class, $class, $key ) = @_; + + my %dict = ( + key => $key, + class => $class, + ); + + my $code = $REGISTRY{$hash_class}{accessor_template}; + my $coderef = _compile_from_tpl( \$code, \%dict ); + _croak_about_code( \$code, 'accessor' ) + if $@; + + return $coderef; +} + +sub _generate_predicate { + my ( $hash_class, $class, $key ) = @_; + + my %dict = ( + key => $key, + class => $class, + ); + + my $code = $REGISTRY{$hash_class}{predicate_template}; + my $coderef = _compile_from_tpl( \$code, \%dict ); + _croak_about_code( \$code, 'predicate' ) + if $@; + + return $coderef; +} + +sub _autoload { + my ( $hash_class, $method, $object ) = @_; + + my ( $class, $key ) = $method =~ /(.*)::(.*)/; + + _croak_class_method( $object, $key ) + unless Scalar::Util::blessed( $object ); + + if ( exists $REGISTRY{$hash_class}{predicate_template} + && $key =~ /^has_(.*)/ ) + { + return _generate_predicate( $hash_class, $class, $1 ); + } + + _croak_object_method( $object, $key ) + unless $REGISTRY{$hash_class}{validate}->( $object, $key ); + + _generate_accessor( $hash_class, $class, $key ); +} + +sub _can { + my ( $self, $key, $CLASS ) = @_; + + my $class = Scalar::Util::blessed( $self ); + return () if !defined $class; + + if ( !exists $self->{$key} ) { + + if ( exists $Hash::Wrap::REGISTRY{$class}{methods}{$key} ) { + ## no critic (ProhibitNoStrict) + no strict 'refs'; + my $method = "${class}::$key"; + return *{$method}{CODE}; + } + return (); + } + + my $method = "${class}::$key"; + + ## no critic (ProhibitNoStrict PrivateSubs) + no strict 'refs'; + return *{$method}{CODE} + || Hash::Wrap::_generate_accessor( $CLASS, $class, $key ); +} + +sub import { ## no critic(ExcessComplexity) + shift; + + my @imports = @_; + push @imports, @EXPORT unless @imports; + + my @return; + + for my $args ( @imports ) { + if ( !ref $args ) { + _croak( "$args is not exported by ", __PACKAGE__ ) + unless grep { /$args/ } @EXPORT; ## no critic (BooleanGrep) + + $args = { -as => $args }; + } + + elsif ( 'HASH' ne ref $args ) { + _croak( 'argument to ', __PACKAGE__, '::import must be string or hash' ) + unless grep { /$args/ } @EXPORT; ## no critic (BooleanGrep) + } + else { + # make a copy as it gets modified later on + $args = {%$args}; + } + + _croak( 'cannot mix -base and -class' ) + if !!$args->{-base} && exists $args->{-class}; + + $DEBUG = $ENV{HASH_WRAP_DEBUG} // delete $args->{-debug}; + + # -as may be explicitly 'undef' to indicate use in a standalone class + $args->{-as} = 'wrap_hash' unless exists $args->{-as}; + my $name = delete $args->{-as}; + + my $target = delete $args->{-into} // caller; + + if ( defined $name ) { + + if ( defined( my $reftype = Scalar::Util::reftype( $name ) ) ) { + _croak( '-as must be undefined or a string or a reference to a scalar' ) + if $reftype ne 'SCALAR' + && $reftype ne 'VSTRING' + && $reftype ne 'REF' + && $reftype ne 'GLOB' + && $reftype ne 'LVALUE' + && $reftype ne 'REGEXP'; + + $args->{-as_scalar_ref} = $name; + + } + + elsif ( $name eq '-return' ) { + $args->{-as_return} = 1; + } + } + + if ( $args->{-base} ) { + _croak( q{don't use -as => -return with -base} ) + if $args->{-as_return}; + $args->{-class} = $target; + $args->{-new} = 1 if !exists $args->{-new}; + _build_class( $target, $name, $args ); + } + + else { + _build_class( $target, $name, $args ); + if ( defined $name ) { + my $sub = _build_constructor( $target, $name, $args ); + if ( $args->{-as_return} ) { + push @return, $sub; + } + elsif ( $args->{-lexical} ) { + _croak( "Perl >= v5.38 is required for -lexical; current perl is $^V" ) + unless HAS_LEXICAL_SUBS; + builtin::export_lexically( $name, $sub ); + } + } + } + + # clean out known attributes + delete @{$args}{ + qw[ -as -as_return -as_scalar_ref -base -class -clone + -copy -defined -exists -immutable -lexical -lockkeys -lvalue + -methods -new -predicate -recurse -undef ] + }; + + if ( keys %$args ) { + _croak( 'unknown options passed to ', __PACKAGE__, '::import: ', join( ', ', keys %$args ) ); + } + } + + return @return; +} + +sub _build_class { ## no critic(ExcessComplexity) + my ( $target, $name, $attr ) = @_; + + # in case we're called inside a recursion and the recurse count + # has hit zero, default behavior is no recurse, so remove it so + # the attr signature computed below isn't contaminated by a + # useless -recurse => 0 attribute. + if ( exists $attr->{-recurse} ) { + _croak( '-recurse must be a number' ) + unless Scalar::Util::looks_like_number( $attr->{-recurse} ); + delete $attr->{-recurse} if $attr->{-recurse} == 0; + } + + if ( !defined $attr->{-class} ) { + + ## no critic (ComplexMappings) + my @class = map { + ( my $key = $_ ) =~ s/-//; + ( $key, defined $attr->{$_} ? $attr->{$_} : '' ) + } sort keys %$attr; + + $attr->{-class} = join q{::}, 'Hash::Wrap::Class', Digest::MD5::md5_hex( @class ); + } + + elsif ( $attr->{-class} eq '-target' || $attr->{-class} eq '-caller' ) { + _croak( "can't set -class => '@{[ $attr->{-class} ]}' if '-as' is not a plain string" ) + if ref $name; + $attr->{-class} = $target . q{::} . $name; + } + + my $class = $attr->{-class}; + + return $class if defined $REGISTRY{$class}; + my $rentry = $REGISTRY{$class} = { methods => {} }; + + my %closures; + my @BODY; + my %dict = ( + class => $class, + signature => q{}, + body => \@BODY, + autoload_attr => q{}, + validate_inline => 'exists $self->{\<>}', + validate_method => 'exists $self->{$key}', + set => '$self->{q[\<>]} = $_[0] if @_;', + return_value => '$self->{q[\<>]}', + recursion_constructor => q{}, + predicate_template => q{}, + ); + + if ( $attr->{-lvalue} ) { + if ( $] lt '5.016000' ) { + _croak( 'lvalue accessors require Perl 5.16 or later' ) + if $attr->{-lvalue} < 0; + } + else { + $dict{autoload_attr} = q[: lvalue]; + $dict{signature} = q[: lvalue]; + } + } + + if ( $attr->{-undef} ) { + $dict{validate_method} = q[ 1 ]; + $dict{validate_inline} = q[ 1 ]; + } + + if ( $attr->{-exists} ) { + $dict{exists} = $attr->{-exists} =~ PerlIdentifier ? $1 : 'exists'; + push @BODY, q[ sub <> { exists $_[0]->{$_[1] } } ]; + $rentry->{methods}{ $dict{exists} } = undef; + } + + if ( $attr->{-defined} ) { + $dict{defined} = $attr->{-defined} =~ PerlIdentifier ? $1 : 'defined'; + push @BODY, q[ sub <> { defined $_[0]->{$_[1] } } ]; + $rentry->{methods}{ $dict{defined} } = undef; + } + + if ( $attr->{-immutable} ) { + $dict{set} = <<'END'; + Hash::Wrap::_croak( q[Modification of a read-only value attempted]) + if @_; +END + } + + if ( $attr->{-recurse} ) { + + # decrement recursion limit. It's infinite recursion if + # -recurse < 0; always set to -1 so we keep using the same + # class. Note that -recurse will never be zero upon entrance + # of this block, as -recurse => 0 is removed from the + # attributes way upstream. + + $dict{recurse_limit} = --$attr->{-recurse} < 0 ? -1 : $attr->{-recurse}; + + $dict{quoted_key} = 'q[\<>]'; + $dict{hash_value} = '$self->{<>}'; + + $dict{recurse_wrap_hash} = '$<>::recurse_into_hash->( <> )'; + + $dict{return_value} = <<'END'; + 'HASH' eq (Scalar::Util::reftype( <> ) // q{}) + && ! Scalar::Util::blessed( <> ) + ? <> + : <>; +END + if ( $attr->{-copy} ) { + + if ( $attr->{-immutable} ) { + $dict{wrap_hash_entry} = <<'END'; + do { Hash::Util::unlock_ref_value( $self, <> ); + <> = <>; + Hash::Util::lock_ref_value( $self, <> ); + <>; + } +END + } + else { + $dict{wrap_hash_entry} = '<> = <>'; + } + + } + else { + $dict{wrap_hash_entry} = '<>'; + } + + # do a two-step initialization of the constructor. If + # the initialization sub is stored in $recurse_into_hash, and then + # $recurse_into_hash is set to the actual constructor I worry that + # Perl may decide to garbage collect the setup subroutine while it's + # busy setting $recurse_into_hash. So, store the + # initialization sub in something other than $recurse_into_hash. + + $dict{recursion_constructor} = <<'END'; +our $recurse_into_hash; +our $setup_recurse_into_hash = sub { + require Hash::Wrap; + ( $recurse_into_hash ) = Hash::Wrap->import ( { %$attr, -as => '-return', + -recurse => <> } ); + goto &$recurse_into_hash; +}; +$recurse_into_hash = $setup_recurse_into_hash; +END + + my %attr = ( %$attr, -recurse => --$attr->{-recurse} < 0 ? -1 : $attr->{-recurse}, ); + delete @attr{qw( -as_scalar_ref -class -base -as )}; + $closures{'$attr'} = \%attr; + } + + if ( $attr->{-predicate} ) { + $dict{predicate_template} = <<'END'; +our $predicate_template = q[ + package \<>; + + use Scalar::Util (); + + sub has_\<> { + my $self = shift; + + Hash::Wrap::_croak_class_method( $self, 'has_\<>' ) + unless Scalar::Util::blessed( $self ); + + return exists $self->{\<>}; + } + + $Hash::Wrap::REGISTRY{methods}{'has_\<>'} = undef; + + \&has_\<>; +]; +END + } + + my $class_template = <<'END'; +package <>; + +<> + +use Scalar::Util (); + +our $validate = sub { + my ( $self, $key ) = @_; + return <>; +}; + +<> + +our $accessor_template = q[ + package \<>; + + use Scalar::Util (); + + sub \<> <> { + my $self = shift; + + Hash::Wrap::_croak_class_method( $self, '\<>' ) + unless Scalar::Util::blessed( $self ); + + Hash::Wrap::_croak_object_method( $self, '\<>' ) + unless ( <> ); + + <> + + return <>; + } + \&\<>; +]; + +<> + + +<> + +our $AUTOLOAD; +sub AUTOLOAD <> { + goto &{ Hash::Wrap::_autoload( q[<>], $AUTOLOAD, $_[0] ) }; +} + +sub DESTROY { } + +sub can { + return Hash::Wrap::_can( @_, q[<>] ); +} + +1; +END + + _compile_from_tpl( \$class_template, \%dict, keys %closures ? \%closures : () ) + or _croak_about_code( \$class_template, "class $class" ); + + if ( !!$attr->{-new} ) { + my $lname = $attr->{-new} =~ PerlIdentifier ? $1 : 'new'; + _build_constructor( $class, $lname, { %$attr, -as_method => 1 } ); + } + + if ( $attr->{-methods} ) { + + my $methods = $attr->{-methods}; + _croak( '-methods option value must be a hashref' ) + unless 'HASH' eq ref $methods; + + for my $mth ( keys %$methods ) { + _croak( "method name '$mth' is not a valid Perl identifier" ) + if $mth !~ PerlIdentifier; + + my $code = $methods->{$mth}; + _croak( qq{value for method "$mth" must be a coderef} ) + unless 'CODE' eq ref $code; + no strict 'refs'; ## no critic (ProhibitNoStrict) + *{"${class}::${mth}"} = $code; + } + + $rentry->{methods}{$_} = undef for keys %$methods; + } + + push @CARP_NOT, $class; + $rentry->{accessor_template} + = _find_symbol( $class, 'accessor_template', [ 'SCALAR', undef ] ); + + if ( $attr->{-predicate} ) { + $rentry->{predicate_template} + = _find_symbol( $class, 'predicate_template', [ 'SCALAR', undef ] ); + } + + $rentry->{validate} = _find_symbol( $class, 'validate', [ 'REF', 'CODE' ] ); + + Scalar::Util::weaken( $rentry->{validate} ); + + return $class; +} + +sub _build_constructor { ## no critic (ExcessComplexity) + my ( $package, $name, $args ) = @_; + + # closure for user provided clone sub + my %closures; + + _croak( 'cannot mix -copy and -clone' ) + if exists $args->{-copy} && exists $args->{-clone}; + + my @USE; + my %dict = ( + package => $package, + constructor_name => $name, + use => \@USE, + package_return_value => '1;', + ); + + $dict{class} + = $args->{-as_method} + ? 'shift;' + : 'q[' . $args->{-class} . '];'; + + my @copy = ( + 'Hash::Wrap::_croak(q{the argument to <>::<> must not be an object})', + ' if Scalar::Util::blessed( $hash );', + ); + + if ( $args->{-copy} ) { + push @copy, '$hash = { %{ $hash } };'; + } + + elsif ( exists $args->{-clone} ) { + + if ( 'CODE' eq ref $args->{-clone} ) { + $closures{'clone'} = $args->{-clone}; + # overwrite @copy, as the clone sub could take an object. + @copy = ( + 'state $clone = $CLOSURES->{clone};', + '$hash = $clone->($hash);', + 'Hash::Wrap::_croak(q{the custom clone routine for <> returned an object instead of a plain hash})', + ' if Scalar::Util::blessed( $hash );', + ); + } + else { + push @USE, q[use Storable ();]; + push @copy, '$hash = Storable::dclone $hash;'; + } + } + + $dict{copy} = join "\n", @copy; + + $dict{lock} = do { + my @eval; + + if ( defined( my $opts = $args->{-immutable} || undef ) ) { + + push @USE, q[use Hash::Util ();]; + + if ( 'ARRAY' eq ref $opts ) { + _croak( "-immutable: attribute name ($_) is not a valid Perl identifier" ) + for grep { $_ !~ PerlIdentifier } @{$opts}; + + push @eval, + 'Hash::Util::lock_keys_plus(%$hash, qw{ ' . join( q{ }, @{$opts} ) . ' });', + '@{$hash}{Hash::Util::hidden_keys(%$hash)} = ();', + ; + } + + push @eval, 'Hash::Util::lock_hash(%$hash)'; + } + elsif ( defined( $opts = $args->{-lockkeys} || undef ) ) { + + push @USE, q[use Hash::Util ();]; + + if ( 'ARRAY' eq ref $args->{-lockkeys} ) { + _croak( "-lockkeys: attribute name ($_) is not a valid Perl identifier" ) + for grep { $_ !~ PerlIdentifier } @{ $args->{-lockkeys} }; + + push @eval, + 'Hash::Util::lock_keys_plus(%$hash, qw{ ' . join( q{ }, @{ $args->{-lockkeys} } ) . ' });'; + } + elsif ( $args->{-lockkeys} ) { + + push @eval, 'Hash::Util::lock_keys(%$hash)'; + } + } + + join( "\n", @eval ); + + }; + + # return the constructor sub from the factory and don't insert the + # name into the package namespace + if ( $args->{-as_scalar_ref} || $args->{-as_return} || $args->{-lexical} ) { + $dict{package_return_value} = q{}; + $dict{constructor_name} = q{}; + } + + #<<< no tidy + my $code = <<'ENDCODE'; + package <>; + + <> + use Scalar::Util (); + + no warnings 'redefine'; + + sub <> (;$) { + my $class = <> + my $hash = shift // {}; + + Hash::Wrap::_croak( 'argument to <>::<> must be a hashref' ) + if 'HASH' ne Scalar::Util::reftype($hash); + <> + bless $hash, $class; + <> + } + <> + +ENDCODE + #>>> + + my $result = _compile_from_tpl( \$code, \%dict, keys %closures ? \%closures : () ) + || _croak_about_code( \$code, "constructor (as $name) subroutine" ); + + # caller asked for a coderef to be stuffed into a scalar + ${$name} = $result if $args->{-as_scalar_ref}; + return $result; +} + +sub _croak_about_code { + my ( $code, $what, $error ) = @_; + $error //= $@; + _line_number_code( $code ); + _croak( qq[error compiling $what: $error\n$$code] ); +} + +sub _line_number_code { + my ( $code ) = @_; + chomp( $$code ); + $$code .= "\n"; + my $space = length( $$code =~ tr/\n// ); + my $line = 0; + $$code =~ s/^/sprintf "%${space}d: ", ++$line/emg; +} + +sub _compile_from_tpl { + my ( $code, $dict, $closures ) = @_; + + if ( defined $closures && %$closures ) { + + # add code to create lexicals if the keys begin with a q{$} + $dict->{closures} = join( "\n", + map { "my $_ = \$CLOSURES->{'$_'};" } + grep { substr( $_, 0, 1 ) eq q{$} } + keys %$closures ); + } + + _interpolate( $code, $dict ); + + if ( $DEBUG ) { + my $lcode = $$code; + _line_number_code( \$lcode ); + print STDERR $lcode; + } + + _clean_eval( $code, exists $dict->{closures} ? $closures : () ); + +} + +# eval in a clean lexical space. +sub _clean_eval { + ## no critic (StringyEval RequireCheckingReturnValueOfEval ) + if ( @_ > 1 ) { + ## no critic (UnusedVars) + my $CLOSURES = $_[1]; + eval( ${ $_[0] } ); + } + else { + eval( ${ $_[0] } ); + } + +} + +sub _interpolate { + my ( $tpl, $dict, $work ) = @_; + $work = { loop => {} } unless defined $work; + + $$tpl =~ s{(\\)?\<\<(\w+)\>\> + }{ + if ( defined $1 ) { + "<<$2>>"; + } + else { + my $key = lc $2; + my $v = $dict->{$key}; + if ( defined $v ) { + $v = join( "\n", @$v ) + if 'ARRAY' eq ref $v; + + _croak( "circular interpolation loop detected for $key" ) + if $work->{loop}{$key}++; + _interpolate( \$v, $dict, $work ); + --$work->{loop}{$key}; + $v; + } + else { + q{}; + } + } + }gex; + return; +} + +1; + +# +# This file is part of Hash-Wrap +# +# This software is Copyright (c) 2017 by Smithsonian Astrophysical Observatory. +# +# This is free software, licensed under: +# +# The GNU General Public License, Version 3, June 2007 +# + +__END__ + +=pod + +=for :stopwords Diab Jerius Smithsonian Astrophysical Observatory getter + +=head1 NAME + +Hash::Wrap - create on-the-fly objects from hashes + +=head1 VERSION + +version 1.09 + +=head1 SYNOPSIS + + use Hash::Wrap; + + my $result = wrap_hash( { a => 1 } ); + print $result->a; # prints + print $result->b; # throws + + # import two constructors, and with different behaviors. + use Hash::Wrap + { -as => 'cloned', clone => 1}, + { -as => 'copied', copy => 1 }; + + my $cloned = cloned( { a => 1 } ); + print $cloned->a; + + my $copied = copied( { a => 1 } ); + print $copied->a; + + # don't pollute your namespace + my $wrap; + use Hash::Wrap { -as => \$wrap}; + my $obj = $wrap->( { a => 1 } ); + + # apply constructors to hashes two levels deep into the hash + use Hash::Wrap { -recurse => 2 }; + + # apply constructors to hashes at any level + use Hash::Wrap { -recurse => -1 }; + +=head1 DESCRIPTION + +B creates objects from hashes, providing accessors for +hash elements. The objects are hashes, and may be modified using the +standard Perl hash operations and the object's accessors will behave +accordingly. + +Why use this class? Sometimes a hash is created on the fly and it's too +much of a hassle to build a class to encapsulate it. + + sub foo () { ... ; return { a => 1 }; } + +With C: + + use Hash::Wrap; + + sub foo () { ... ; return wrap_hash( { a => 1 ); } + + my $obj = foo (); + print $obj->a; + +Elements can be added or removed to the object and accessors will +track them. The object may be made immutable, or may have a restricted +set of attributes. + +There are many similar modules on CPAN (see L for comparisons). + +What sets B apart is that it's possible to customize +object construction and accessor behavior: + +=over + +=item * + +It's possible to use the passed hash directly, or make shallow or deep +copies of it. + +=item * + +Accessors can be customized so that accessing a non-existent element +can throw an exception or return the undefined value. + +=item * + +On recent enough versions of Perl, accessors can be lvalues, e.g. + + $obj->existing_key = $value; + +=back + +=head1 USAGE + +=head2 Simple Usage + +C'ing B without options imports a subroutine called +B which takes a hash, blesses it into a wrapper class and +returns the hash: + + use Hash::Wrap; + + my $h = wrap_hash { a => 1 }; + print $h->a, "\n"; # prints 1 + +B<[API change @ v1.0]> +The passed hash must be a plain hash (i.e. not an object or blessed +hash). To pass an object, you must specify a custom clone subroutine +returning a plain hashref via the L option. + +The wrapper class has no constructor method, so the only way to create +an object is via the B subroutine. (See L +for more about wrapper classes) If B is called without +arguments, it will create a hash for you. + +=head2 Advanced Usage + +=head3 B is an awful name for the constructor subroutine + +So rename it: + + use Hash::Wrap { -as => "a_much_better_name_for_wrap_hash" }; + + $obj = a_much_better_name_for_wrap_hash( { a => 1 } ); + +=head3 The Wrapper Class name matters + +If the class I matters, but it'll never be instantiated +except via the imported constructor subroutine: + + use Hash::Wrap { -class => 'My::Class' }; + + my $h = wrap_hash { a => 1 }; + print $h->a, "\n"; # prints 1 + $h->isa( 'My::Class' ); # returns true + +or, if you want it to reflect the current package, try this: + + package Foo; + use Hash::Wrap { -class => '-target', -as => 'wrapit' }; + + my $h = wrapit { a => 1 }; + $h->isa( 'Foo::wrapit' ); # returns true + +Again, the wrapper class has no constructor method, so the only way to +create an object is via the generated subroutine. + +=head3 The Wrapper Class needs its own class constructor method + +To generate a wrapper class which can be instantiated via its own +constructor method: + + use Hash::Wrap { -class => 'My::Class', -new => 1 }; + +The default B constructor subroutine is still exported, so + + $h = My::Class->new( { a => 1 } ); + +and + + $h = wrap_hash( { a => 1 } ); + +do the same thing. + +To give the constructor method a different name: + + use Hash::Wrap { -class => 'My::Class', -new => '_my_new' }; + +To prevent the constructor subroutine from being imported: + + use Hash::Wrap { -as => undef, -class => 'My::Class', -new => 1 }; + +=head3 A stand alone Wrapper Class + +To create a stand alone wrapper class, + + package My::Class; + + use Hash::Wrap { -base => 1 }; + + 1; + +And later... + + use My::Class; + + $obj = My::Class->new( \%hash ); + +It's possible to modify the constructor and accessors: + + package My::Class; + + use Hash::Wrap { -base => 1, -new => 'new_from_hash', -undef => 1 }; + + 1; + +=head2 Recursive wrapping + +B can automatically wrap nested hashes using the +L option. + +=head3 Using the original hash + +The L option allows mapping nested hashes onto chained +methods, e.g. + + use Hash::Wrap { -recurse => -1, -as => 'recwrap' }; + + my %hash = ( a => { b => { c => 'd' } } ); + + my $wrap = recwrap(\%hash); + + $wrap->a->b->c eq 'd'; # true + +Along the way, B<%hash>, B<$hash{a}>, B<$hash{b}>, B<$hash{c}> are all +blessed into wrapping classes. + +=head3 Copying the original hash + +If L is also specified, then the relationship between the +nested hashes in the original hash and those hashes retrieved by +wrapper methods depends upon what level in the structure has been +wrapped. For example, + + use Hash::Wrap { -recurse => -1, -copy => 1, -as => 'copyrecwrap' }; + use Scalar::Util 'refaddr'; + + my %hash = ( a => { b => { c => 'd' } } ); + + my $wrap = copyrecwrap(\%hash); + + refaddr( $wrap ) != refaddr( \%hash ); + +Because the C<< $wrap->a >> method hasn't been called, then the B<$hash{a}> structure +has yet to be wrapped, so, using C<$wrap> as a hash, + + refaddr( $wrap->{a} ) == refaddr( $hash{a} ); + +However, + + # invoking $wrap->a wraps a copy of $hash{a} because of the -copy + # attribute + refaddr( $wrap->a ) != refaddr( $hash{a} ); + + # so $wrap->{a} is no longer the same as $hash{a}: + refaddr( $wrap->{a} ) != refaddr( $hash{a} ); + refaddr( $wrap->{a} ) == refaddr( $wrap->a ); + +=head3 Importing into an alternative package + +Normally the constructor is installed into the package importing C. +The C<-into> option can change that: + + package This::Package; + use Hash::Wrap { -into => 'Other::Package' }; + +will install B. + +=head1 OPTIONS + +B works at import time. To modify its behavior pass it +options when it is C'd: + + use Hash::Wrap { %options1 }, { %options2 }, ... ; + +Multiple options hashes may be passed; each hash specifies options for +a separate constructor or class. + +For example, + + use Hash::Wrap + { -as => 'cloned', clone => 1}, + { -as => 'copied', copy => 1 }; + +creates two constructors, C and C with different +behaviors. + +=head2 Constructor + +=over + +=item C<-as> => I || C || I || C<-return> + +(This defaults to the string C ) + +If the argument is + +=over + +=item * + +a string (but not the string C<-return>) + +Import the constructor subroutine with the given name. + +=item * + +undefined + +Do not import the constructor. This is usually only used with the +L option. + +=item * + +a scalar ref + +Do not import the constructor. Store a reference to the constructor +into the scalar. + +=item * + +The string C<-return>. + +Do not import the constructor. The constructor subroutine(s) will be +returned from C's C method. This is a fairly +esoteric way of doing things: + + require Hash::Wrap; + ( $copy, $clone ) = Hash::Wrap->import( { -as => '-return', copy => 1 }, + { -as => '-return', clone => 1 } ); + +A list is always returned, even if only one constructor is created. + +=back + +=item C<-copy> => I + +If true, the object will store the data in a I copy of the +hash. By default, the object uses the hash directly. + +=item C<-clone> => I | I + +Store the data in a deep copy of the hash. if I, +L is used. If a coderef, it will be called as + + $clone = $coderef->( $hash ) + +C<$coderef> must return a plain hashref. + +By default, the object uses the hash directly. + +=item C<-lexical> => I + +On Perl v5.38 or higher, this will cause the constructor subroutine to +be installed lexically in the target package. + +On Perls prior to v5.38 this causes an exception. + +=item C<-immutable> => I | I + +If the value is I, the object's attributes and values are locked +and may not be altered. Note that this locks the underlying hash. + +If the value is an array reference, it specifies which attributes are +allowed, I. Attributes which are +not set when the object is created are set to C. For example, + + use Hash::Wrap { -immutable => [ qw( a b c ) ] }; + + my $obj = wrap_hash( { a => 1, b => 2 } ); + + ! defined( $obj->c ) == true; # true statement. + +=item C<-lockkeys> => I | I + +If the value is I, the object's attributes are restricted to the +existing keys in the hash. If it is an array reference, it specifies +which attributes are allowed, I. +The attribute's values are not locked. Note that this locks the +underlying hash. + +=item C<-into> => I + +The name of the package in which to install the constructor. By default +it's that of the caller. + +=back + +=head2 Accessors + +=over + +=item C<-undef> => I + +Normally an attempt to use an accessor for an non-existent key will +result in an exception. This option causes the accessor +to return C instead. It does I create an element in +the hash for the key. + +=item C<-lvalue> => I + +If non-zero, the accessors will be lvalue routines, e.g. they can +change the underlying hash value by assigning to them: + + $obj->attr = 3; + +The hash entry I or this will throw an exception. + +lvalue subroutines are only available on Perl version 5.16 and later. + +If C<-lvalue = 1> this option will silently be ignored on earlier +versions of Perl. + +If C<-lvalue = -1> this option will cause an exception on earlier +versions of Perl. + +=item C<-recurse> => I + +Normally only the top level hash is wrapped in a class. This option +specifies how many levels deep into the hash hashes should be wrapped. +For example, if + + %h = ( l => 0, a => { l => 1, b => { l => 2, c => { l => 3 } } } }; + + use Hash::Wrap { -recurse => 0 }; + $h->l # => 0 + $h->a->l # => ERROR + + use Hash::Wrap { -recurse => 1 }; + $h->l # => 0 + $h->a->l # => 1 + $h->a->b->l # => ERROR + + use Hash::Wrap { -recurse => 2 }; + $h->l # => 0 + $h->a->l # => 1 + $h->a->b->l # => 2 + $h->a->b->c->l # => ERROR + +For infinite recursion, set C<-recurse> to C<-1>. + +Constructors built for deeper hash levels will not heed the +C<-as_scalar_ref>, C<-class>, C<-base>, or C<-as> attributes. + +=back + +=head2 Class + +=over + +=item C<-base> => I + +If true, the enclosing package is converted into a proxy wrapper +class. This should not be used in conjunction with C<-class>. See +L. + +=item C<-class> => I + +A class with the given name will be created and new objects will be +blessed into the specified class by the constructor subroutine. The +new class will not have a constructor method. + +If I is the string C<-target> (or, deprecated, +C<-caller>), then the class name is set to the fully qualified name of +the constructor, e.g. + + package Foo; + use Hash::Wrap { -class => '-target', -as => 'wrap_it' }; + +results in a class name of C. + +If not specified, the class name will be constructed based upon the +options. Do not rely upon this name to determine if an object is +wrapped by B. + +=item C<-new> => I | I + +Add a class constructor method. + +If C<-new> is a true boolean value, the method will be called +C. Otherwise C<-new> specifies the name of the method. + +=back + +=head3 Extra Class Methods + +=over + +=item C<-defined> => I | I + +Add a method which returns true if the passed hash key is defined or +does not exist. If C<-defined> is a true boolean value, the method +will be called C. Otherwise it specifies the name of the +method. For example, + + use Hash::Wrap { -defined => 1 }; + $obj = wrap_hash( { a => 1, b => undef } ); + + $obj->defined( 'a' ); # TRUE + $obj->defined( 'b' ); # FALSE + $obj->defined( 'c' ); # FALSE + +or + + use Hash::Wrap { -defined => 'is_defined' }; + $obj = wrap_hash( { a => 1 } ); + $obj->is_defined( 'a' ); + +=item C<-exists> => I | I + +Add a method which returns true if the passed hash key exists. If +C<-exists> is a boolean, the method will be called +C. Otherwise it specifies the name of the method. For example, + + use Hash::Wrap { -exists => 1 }; + $obj = wrap_hash( { a => 1 } ); + $obj->exists( 'a' ); + +or + + use Hash::Wrap { -exists => 'is_present' }; + $obj = wrap_hash( { a => 1 } ); + $obj->is_present( 'a' ); + +=item C<-predicate> => I + +This adds the more traditionally named predicate methods, such as +C for attribute C. Note that this option makes any +elements which begin with C unavailable via the generated +accessors. + +=item C<-methods> => { I => I, ... } + +Install the passed code references into the class with the specified +names. These override any attributes in the hash. For example, + + use Hash::Wrap { -methods => { a => sub { 'b' } } }; + + $obj = wrap_hash( { a => 'a' } ); + $obj->a; # returns 'b' + +=back + +=head1 WRAPPER CLASSES + +A wrapper class has the following characteristics. + +=over + +=item * + +It has the methods C, C and C. + +=item * + +It will have other methods if the C<-undef> and C<-exists> options are +specified. It may have other methods if it is L. + +=item * + +It will have a constructor if either of C<-base> or C<-new> is specified. + +=back + +=head2 Wrapper Class Limitations + +=over + +=item * + +Wrapper classes have C, C method, and C +methods, which will mask hash keys with the same names. + +=item * + +Classes which are generated without the C<-base> or C<-new> options do +not have a class constructor method, e.g C<< Class->new() >> will +I return a new object. The only way to instantiate them is via +the constructor subroutine generated via B. This allows +the underlying hash to have a C attribute which would otherwise +be masked by the constructor. + +=back + +=head1 LIMITATIONS + +=head2 Lvalue accessors + +Lvalue accessors are available only on Perl 5.16 and later. + +=head2 Accessors for deleted hash elements + +Accessors for deleted elements are not removed. The class's C +method will return C for them, but they are still available in +the class's stash. + +=head2 Wrapping immutable structures + +Locked (e.g. immutable) hashes cannot be blessed into a class. This +will cause B to fail if it is asked to work directly +(without cloning or copying) on a locked hash or recursive wrapping is +specified and the hash contains nested locked hashes. + +To create an immutable B object from an immutable hash, +use the L and L attributes. The L +attribute performs a shallow copy of the hash which is then locked by +L. The default L option will not work, as it +will clone the immutability of the input hash. + +Adding the L option will properly create an immutable +wrapped object when used on locked hashes. It does not suffer the +issue described in L in L. + +=head2 Cloning with recursion + +Cloning by default uses L, which performs a deep clone +of the passed hash. In recursive mode, the clone operation is performed at every +wrapping of a nested hash, causing some data to be repeatedly cloned. +This does not create a memory leak, but it is inefficient. Consider +using L instead of L with L. + +=head1 BUGS + +=head2 Eventual immutability in nested structures + +Immutability is added to mutable nested structures as they are +traversed via method calls. This means that the hash underlying the +wrapper object is not fully immutable until all nested hashes have +been visited via methods. + +For example, + + use Hash::Wrap { -immutable => 1, -recurse => -1, -as 'immutable' }; + + my $wrap = immutable( { a => { b => 2 } } ); + $wrap->{a} = 11; # expected fail: IMMUTABLE + $wrap->{a}{b} = 22; # unexpected success: NOT IMMUTABLE + $wrap->a; + $wrap->{a}{b} = 33; # expected fail: IMMUTABLE; $wrap->{a} is now locked + +=head1 EXAMPLES + +=head2 Existing keys are not compatible with method names + +If a hash key contains characters that aren't legal in method names, +there's no way to access that hash entry. One way around this is to +use a custom clone subroutine which modifies the keys so they are +legal method names. The user can directly insert a non-method-name +key into the C object after it is created, and those still +have a key that's not available via a method, but there's no cure for +that. + +=head1 SEE ALSO + +Here's a comparison of this module and others on CPAN. + +=over + +=item B (this module) + +=over + +=item * core dependencies only + +=item * object tracks additions and deletions of entries in the hash + +=item * optionally applies object paradigm recursively + +=item * accessors may be lvalue subroutines + +=item * accessing a non-existing element via an accessor +throws by default, but can optionally return C + +=item * can use custom package + +=item * can copy/clone existing hash. clone may be customized + +=item * can add additional methods to the hash object's class + +=item * optionally stores the constructor in a scalar + +=item * optionally provides per-attribute predicate methods +(e.g. C) + +=item * optionally provides methods to check an attribute existence or +whether its value is defined + +=item * can create immutable objects + +=back + +=item L + +As you might expect from a DCONWAY module, this does just +about everything you'd like. It has a very heavy set of dependencies. + +=item L + +=over + +=item * core dependencies only + +=item * applies object paradigm recursively + +=item * accessing a non-existing element via an accessor creates it + +=back + +=item L + +=over + +=item * moderate dependency chain (no XS?) + +=item * applies object paradigm recursively + +=item * accessing a non-existing element throws + +=back + +=item L + +=over + +=item * core dependencies only + +=item * only applies object paradigm to top level hash + +=item * can add generic accessor, mutator, and element management methods + +=item * accessing a non-existing element via an accessor creates it +(not documented, but code implies it) + +=item * C doesn't work + +=back + +=item L + +=over + +=item * core dependencies only + +=item * accessing a non-existing element via an accessor returns undef + +=item * applies object paradigm recursively + +=back + +=item L + +=over + +=item * moderate dependency chain. Requires XS, tied hashes + +=item * applies object paradigm recursively + +=item * accessing a non-existing element via an accessor creates it + +=back + +=item L + +=over + +=item * light dependency chain. Requires XS. + +=item * only applies object paradigm to top level hash + +=item * accessing a non-existing element throws, but if an existing +element is accessed, then deleted, accessor returns undef rather than +throwing + +=item * can use custom package + +=back + +=item L + +=over + +=item * uses source filters + +=item * applies object paradigm recursively + +=back + +=item L + +=over + +=item * light dependency chain + +=item * applies object paradigm recursively + +=item * accessing a non-existing element via an accessor creates it + +=back + +=item L + +=over + +=item * core dependencies only + +=item * no documentation + +=back + +=item L + +=over + +=item * core dependencies only + +=item * only applies object paradigm to top level hash + +=item * accessors may be lvalue subroutines + +=item * accessing a non-existing element via an accessor +returns C by default, but can optionally throw. Changing behavior +is done globally, so all objects are affected. + +=item * accessors must be explicitly added. + +=item * accessors may have aliases + +=item * values may be validated + +=item * invoking an accessor may trigger a callback + +=back + +=item L + +=over + +=item * minimal non-core dependencies (L) + +=item * uses L if available + +=item * only applies object paradigm to top level hash + +=item * provides separate getter and predicate methods, but only +for existing keys in hash. + +=item * hash keys are locked. + +=item * operates directly on hash. + +=back + +=item L + +=over + +=item * has a cool name + +=item * core dependencies only + +=item * locks hash by default + +=item * optionally recurses into the hash + +=item * does not track changes to hash + +=item * can destroy class + +=item * can add methods + +=item * can use custom package + +=back + +=back + +=head1 SUPPORT + +=head2 Bugs + +Please report any bugs or feature requests to bug-hash-wrap@rt.cpan.org or through the web interface at: L + +=head2 Source + +Source is available at + + https://codeberg.org/djerius/p5-Hash-Wrap + +and may be cloned from + + https://codeberg.org/djerius/p5-Hash-Wrap.git + +=head1 AUTHOR + +Diab Jerius + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2017 by Smithsonian Astrophysical Observatory. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff --git a/dev/design/hash_wrap_triage_plan.md b/dev/design/hash_wrap_triage_plan.md new file mode 100644 index 000000000..8e834048d --- /dev/null +++ b/dev/design/hash_wrap_triage_plan.md @@ -0,0 +1,211 @@ +# Hash::Wrap `t/as_return.t` — GC-thrash / infinite-loop triage plan + +**Status**: Investigation in progress. PR #536 blocked until this class of failure is resolved. + +## Scope + +Hash::Wrap's `t/as_return.t` (45 lines) and DBIx::Class exhibit the same class of failure: extremely high CPU + memory, no apparent forward progress, wallclock >> real-Perl expectation. User-visible symptom is "stuck" or "timeout". + +This plan picks Hash::Wrap as the minimal reproducer (tight CPAN test, independent of DBIC fixtures). + +## Observations (2026-04-23) + +### Reproducer captured +``` +/Users/fglock/projects/PerlOnJava3/dev/bench/hash_wrap_repro/ + t/as_return.t # 45 lines, copied from Hash-Wrap-1.09 + lib/Hash/Wrap.pm # upstream pure-Perl +``` + +Invoke: +```bash +cd dev/bench/hash_wrap_repro +timeout 30 ../../../jperl -Ilib t/as_return.t +``` + +Baseline: at 15 s the main thread has used 13 s CPU (~89 % of one core — **not** GC-thrash on my machine). Reproduces at 11+ cores on the user's original machine — same code, different GC amplification due to machine/load. Correctness-level reproducer is the same. + +### First bug localised: `B::NULL::next` self-loop + +`jstack` on the stuck process shows the inner loop is: + +``` +java.util.concurrent.ConcurrentHashMap.get(ConcurrentHashMap.java:952) +NameNormalizer.normalizeVariableName(NameNormalizer.java:144) +InheritanceResolver.findMethodInHierarchy(InheritanceResolver.java:310) +Universal.can(Universal.java:175) +RuntimeCode.callCached(RuntimeCode.java:1780) +anon1485.apply(Test2/Util/Sub.pm:577) <-- $op->can('line') / $op->can('next') +``` + +Tracing upward: `Test2::Util::Sub::sub_info` walks the OP tree: + +```perl +my $op = $cobj->START; +while ($op) { + push @all_lines => $op->line if $op->can('line'); + last unless $op->can('next'); # <- termination check + $op = $op->next; +} +``` + +PerlOnJava's `src/main/perl/lib/B.pm` has: + +```perl +package B::NULL { + our @ISA = ('B::OP'); + sub new { bless {}, shift } + sub next { + # NULL is terminal -- return self to prevent infinite loops + return $_[0]; + } +} +``` + +**The comment is inverted.** Returning `$_[0]` keeps `$op` as the same B::NULL forever: + +* `$op->can('line')` → true (inherited from B::OP) +* `$op->can('next')` → true (inherited from B::OP) +* `$op = $op->next` → same B::NULL +* Loop never exits, `@all_lines` grows unboundedly → GC pressure once array outgrows young gen → user sees the 13 GC threads + 25 % useful CPU. + +Hash::Wrap trips this because Test2's structural compare (`meta { prop ... object { call ... } }`) calls `sub_info` on every comparison callback — one infinite loop per check. + +DBIx::Class likely trips the same path (its test suite also uses Test2 deep compare, and DBIC itself uses Sub::Defer / B introspection heavily). + +### Fix for the immediate infinite loop + +Replace `B::NULL::next` with a sentinel that actually terminates the common walker patterns: + +```perl +package B::NULL { + our @ISA = ('B::OP'); + sub new { bless {}, shift } + + # Every method call on B::NULL returns undef (matches real Perl XS). + # Crucially, `$null->next` returning undef terminates while($op) loops. + sub next { return; } + sub line { return; } + # `can('next')` still returns true via B::OP inheritance; the + # caller's `$op = $op->next` sets $op to undef and while($op) exits. +} +``` + +Before landing: audit other B.pm sentinel methods (`sibling`, `targ`, `sibparent`, `first`, `last`, etc.) for the same mistake. + +## Why this is sufficient for Hash::Wrap but not the full class of problem + +The B::NULL fix makes `sub_info` terminate on first invocation. Once it's terminating: + +1. The test proceeds into the actual structural compare. +2. Every `is($obj, meta { ... })` still allocates deep `Test2::Compare::Delta` trees. +3. Each Delta node is a blessed hashref → traverses `RuntimeScalar.setLargeRefCounted`, `MortalList.deferDecrement*`, walker arming etc. +4. This is the *real* distributed-tax problem we already confirmed in Phase R. + +With just the B::NULL fix, Hash::Wrap completes but still runs an order of magnitude slower than real Perl. That may be acceptable for the test-to-pass gate; it is not acceptable for "perf parity". The full plan below addresses both. + +## Plan + +Four phases. Each phase has an explicit measurement gate before moving to the next. + +### Phase 0 — Unblock the test (same-day) + +1. **Fix `B::NULL::next`** and audit other B.pm sentinels (see above). +2. Run Hash::Wrap `t/as_return.t` and `DBIx-Class-0.082844-68/t/storage/base.t` to completion. Record wallclock, CPU ratio, allocation rate via JFR. +3. Acceptance: both complete in finite time, produce TAP with actual pass/fail rather than timeouts. (Pass/fail counts themselves can still regress — that's Phase 1-3 territory.) +4. Commit the fix on `perf/phase-r-needs-cleanup`. + +**Risk**: very low. Change is localised to the B.pm shim. Regression surface: code that relied on `$null->next == $null` for some iteration invariant. No known such code. + +### Phase 1 — Establish allocation baseline + +Goal: turn "slow under GC" from hand-wave into numbers. + +1. JFR run on Hash::Wrap `t/as_return.t`: + ``` + JPERL_OPTS="-XX:+FlightRecorder -XX:StartFlightRecording=\ + filename=dev/bench/results/jfr/hash_wrap.jfr,\ + settings=profile,duration=60s" \ + ./jperl -Ilib t/as_return.t + ``` + Capture `jdk.ObjectAllocationSample` + `jdk.ObjectAllocationInNewTLAB` + `jdk.GCHeapSummary`. + +2. Same run with `JPERL_CLASSIC=1` for the upper bound. + +3. Top allocators (top 10 by bytes): expected candidates are `RuntimeScalar`, `RuntimeHash`, `RuntimeArray`, `MortalList$Entry`, Test2 Delta/Check/Meta classes (pure Perl packages compiled to our anon classes). Record exact numbers in `dev/design/hash_wrap_alloc_profile.md`. + +4. GC metric deltas: young-gen pause %, old-gen promotions/sec, total GC time as % of wallclock. If CLASSIC drops GC time from e.g. 60 % to 10 %, we know our machinery is the allocation driver; if GC stays high under CLASSIC, the allocation source is non-PerlOnJava (upstream Test2 / Hash::Wrap pattern itself). + +**Acceptance gate**: an allocation profile committed under `dev/bench/results/` that clearly identifies the top 3 allocation sites contributing >60 % of bytes. + +### Phase 2 — Reduce allocation at the top-3 sites + +This is concrete engineering work whose scope depends on Phase 1's findings. Candidate targets based on prior profiling work: + +| Candidate | Already known from | Expected impact | +|---|---|---| +| `RuntimeList.add` → `ArrayList.grow` from initial capacity 10 | `life_bitpacked_jfr_profile.md` | 5–14 % on life_bitpacked | +| `MortalList.pending` growth (same `ArrayList.grow` pattern) | `classic_experiment_finding.md` (implicit) | varies with callsite density | +| Per-`my` `MyVarCleanupStack.register` list add | Phase R measured | already captured in `1.49×` | +| Intermediate `RuntimeScalar(integer)` boxing in comparison callbacks | `life_bitpacked_jfr_profile.md` (via `RuntimeScalarCache.getScalarInt`) | unknown for Test2 workload | + +For each chosen target: + +1. Minimal hack that short-circuits the allocation (even if broken) — upper-bound measurement. +2. If upper bound ≥ 5 % wallclock improvement, implement cleanly. +3. If < 5 %, document and move on (Phase 1 Lessons Learned rule). + +**Acceptance gate**: Hash::Wrap wallclock within 5 × real Perl and no test failures beyond pre-existing. + +### Phase 3 — Conditional machinery (the real Phase R) + +`JPERL_CLASSIC=1` proved that removing the machinery globally restores master-era performance. Making the machinery *conditional on need* gives us that speedup without sacrificing DESTROY/weaken correctness. + +Proposal restated here for a fresh reader: + +* One `public boolean needsCleanup` on `RuntimeBase`, default `false`. +* Set to `true` on: `bless` into a class with `DESTROY`, `Scalar::Util::weaken`, closure-capture of a blessed referent (later — first cut only covers the first two). +* Every CLASSIC-gated site becomes `if (!base.needsCleanup) return ;`: + - `RuntimeScalar.setLargeRefCounted` + - `RuntimeScalar.scopeExitCleanup` + - `MortalList.deferDecrementIfTracked` etc. + - `MortalList.scopeExitCleanupHash` / `scopeExitCleanupArray` + - `EmitVariable`: MyVarCleanupStack.register emission (still compile-time gated via `CleanupNeededVisitor`, that stays) + +Test2's `Compare::Delta` nodes are blessed but *don't* have DESTROY — so they land on the fast path. Hash::Wrap's `A1`/`A2` wrappers are blessed but don't have DESTROY — fast path. DBIC's `ResultSet`/`ResultSource` *do* have DESTROY (via `next::can` dispatch under the hood) — slow path, correct. + +**Scope**: ~30 gate sites mapped by the CLASSIC patch. Each call site gets a one-line guard. Core invariant change is on `RuntimeBase` — one new bit. + +**Acceptance gate** (the PR merge gate): + +| Measurement | Gate | +|---|---| +| Hash::Wrap `t/as_return.t` | passes in < 2 × real-Perl wallclock | +| DBIC full suite `./jcpan -t DBIx::Class` | zero timeouts; same pass count as commit `99509c6a0` (13 804 / 13 804) | +| `make test-bundled-modules` | still 176 / 176 | +| `make` unit tests | no new regressions beyond pre-existing `destroy_eval_die.t#4` | +| `life_bitpacked` | Phase R speedup preserved (≥ 1.3 × vs pre-merge baseline) | +| `destroy_eval_die.t` | same pass count (9 / 10 on current branch) | +| DBIx::Class `t/storage/txn_scope_guard.t` | 18 / 18 | + +**Risk**: Medium. Per-object bit is simple in principle; the hard part is ensuring every *entry* into the tracked-object set correctly flips the bit. Fortunately the CLASSIC patch already identifies the gates, so we have a map. + +### Phase 4 — Validation & documentation + +1. Run Phase 3 acceptance gate on a clean machine. Document wallclock/CPU/GC numbers for each benchmark in `dev/bench/results/`. +2. Update `dev/design/perl_parity_plan.md` to reflect Phase R → Phase R+(refcount-by-need) progression. +3. Merge PR #536 once all gates are green. +4. File follow-up tickets for remaining ≤ 5 % per-site optimisations (none are in scope for the merge). + +## Sequence / dependencies + +``` +Phase 0 (immediate fix) ──┐ + ├─▶ Phase 1 (profile) ──▶ Phase 2 (alloc reductions) ──▶ Phase 3 (conditional machinery) ──▶ Phase 4 (validate + merge) +``` + +Phase 0 is the sole prerequisite to unblock `./jcpan -t DBIx::Class` from getting stuck in the infinite loop. Phases 2 and 3 are independent of each other — if Phase 2 alone gets us to the merge gate, Phase 3 can slip to a follow-up PR. + +## Immediate next step + +Apply the B::NULL fix, verify Hash::Wrap completes (doesn't need to *pass*, just complete), commit, rerun `./jcpan -t DBIx::Class` to see whether any tests that were previously timing out now progress to a proper result. diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index 88af9d866..26dd95882 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,14 +33,14 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "bd50873f7"; + public static final String gitCommitId = "4a1ad046b"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitDate = "2026-04-22"; + public static final String gitCommitDate = "2026-04-23"; /** * Build timestamp in Perl 5 "Compiled at" format (e.g., "Apr 7 2026 11:20:00"). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 23 2026 09:27:22"; + public static final String buildTimestamp = "Apr 23 2026 10:28:43"; // Prevent instantiation private Configuration() { diff --git a/src/main/perl/lib/B.pm b/src/main/perl/lib/B.pm index 41635b935..39c6e3605 100644 --- a/src/main/perl/lib/B.pm +++ b/src/main/perl/lib/B.pm @@ -307,10 +307,30 @@ package B::NULL { return bless {}, $class; } - sub next { - # NULL is terminal -- return self to prevent infinite loops - return $_[0]; - } + # B::NULL represents the terminal "null op" in an OP chain. Real Perl's + # XS-backed B::NULL returns undef from all accessor methods (via xs magic), + # which is what common optree walkers like Test2::Util::Sub::sub_info rely + # on to detect end-of-chain: + # + # my $op = $cv->START; + # while ($op) { # <- B::NULL must be falsy-returning + # push @lines => $op->line if $op->can('line'); + # last unless $op->can('next'); + # $op = $op->next; # <- must eventually yield undef + # } + # + # Previous implementation returned `$_[0]` (self) from `next`, which kept + # `$op` pinned on B::NULL forever, causing infinite loops and unbounded + # `@all_lines` growth — observable as GC-thrash + apparent hangs in any + # module that introspects sub coderefs (Test2 deep-compare, Hash::Wrap, + # DBIx::Class, Sub::Defer). See dev/design/hash_wrap_triage_plan.md. + sub next { return; } + sub line { return; } + sub file { return; } + sub sibling { return; } + sub first { return; } + sub last { return; } + sub targ { return; } } package B::COP {