diff --git a/dev/bench/hash_wrap_repro/lib/Hash/Wrap.pm b/dev/bench/hash_wrap_repro/lib/Hash/Wrap.pm new file mode 100644 index 000000000..d362afdee --- /dev/null +++ b/dev/bench/hash_wrap_repro/lib/Hash/Wrap.pm @@ -0,0 +1,1627 @@ +package Hash::Wrap; + +# ABSTRACT: create on-the-fly objects from hashes + +use 5.01000; + +use strict; +use warnings; + +use Scalar::Util; +use Digest::MD5; +our $VERSION = '1.09'; + +our @EXPORT = qw[ wrap_hash ]; + +our @CARP_NOT = qw( Hash::Wrap ); +our $DEBUG = 0; + +# copied from Damian Conway's PPR: PerlIdentifier +use constant PerlIdentifier => qr/\A([^\W\d]\w*+)\z/; + +# use builtin::export_lexically if available +use constant HAS_LEXICAL_SUBS => $] >= 5.038; +use if HAS_LEXICAL_SUBS, 'experimental', 'builtin'; +use if HAS_LEXICAL_SUBS, 'builtin'; + +our %REGISTRY; + +sub _croak { + require Carp; + goto \&Carp::croak; +} + +sub _croak_class_method { + my ( $class, $method ) = @_; + $class = ref( $class ) || $class; + _croak( qq[Can't locate class method "$method" via package "$class"] ); +} + +sub _croak_object_method { + my ( $object, $method ) = @_; + my $class = Scalar::Util::blessed( $object ) || ref( $object ) || $object; + _croak( qq[Can't locate object method "$method" via package "$class"] ); +} + +sub _find_symbol { + my ( $package, $symbol, $reftype ) = @_; + + no strict 'refs'; ## no critic (ProhibitNoStrict) + my $candidate = *{"$package\::$symbol"}{SCALAR}; + + return $$candidate + if defined $candidate + && 2 == grep { defined $_->[0] && defined $_->[1] ? $_->[0] eq $_->[1] : 1 } + [ $reftype->[0], Scalar::Util::reftype $candidate ], + [ $reftype->[1], Scalar::Util::reftype $$candidate ]; + + _croak( "Unable to find scalar \$$symbol in class $package" ); +} + +# this is called only if the method doesn't exist. +sub _generate_accessor { + my ( $hash_class, $class, $key ) = @_; + + my %dict = ( + key => $key, + class => $class, + ); + + my $code = $REGISTRY{$hash_class}{accessor_template}; + my $coderef = _compile_from_tpl( \$code, \%dict ); + _croak_about_code( \$code, 'accessor' ) + if $@; + + return $coderef; +} + +sub _generate_predicate { + my ( $hash_class, $class, $key ) = @_; + + my %dict = ( + key => $key, + class => $class, + ); + + my $code = $REGISTRY{$hash_class}{predicate_template}; + my $coderef = _compile_from_tpl( \$code, \%dict ); + _croak_about_code( \$code, 'predicate' ) + if $@; + + return $coderef; +} + +sub _autoload { + my ( $hash_class, $method, $object ) = @_; + + my ( $class, $key ) = $method =~ /(.*)::(.*)/; + + _croak_class_method( $object, $key ) + unless Scalar::Util::blessed( $object ); + + if ( exists $REGISTRY{$hash_class}{predicate_template} + && $key =~ /^has_(.*)/ ) + { + return _generate_predicate( $hash_class, $class, $1 ); + } + + _croak_object_method( $object, $key ) + unless $REGISTRY{$hash_class}{validate}->( $object, $key ); + + _generate_accessor( $hash_class, $class, $key ); +} + +sub _can { + my ( $self, $key, $CLASS ) = @_; + + my $class = Scalar::Util::blessed( $self ); + return () if !defined $class; + + if ( !exists $self->{$key} ) { + + if ( exists $Hash::Wrap::REGISTRY{$class}{methods}{$key} ) { + ## no critic (ProhibitNoStrict) + no strict 'refs'; + my $method = "${class}::$key"; + return *{$method}{CODE}; + } + return (); + } + + my $method = "${class}::$key"; + + ## no critic (ProhibitNoStrict PrivateSubs) + no strict 'refs'; + return *{$method}{CODE} + || Hash::Wrap::_generate_accessor( $CLASS, $class, $key ); +} + +sub import { ## no critic(ExcessComplexity) + shift; + + my @imports = @_; + push @imports, @EXPORT unless @imports; + + my @return; + + for my $args ( @imports ) { + if ( !ref $args ) { + _croak( "$args is not exported by ", __PACKAGE__ ) + unless grep { /$args/ } @EXPORT; ## no critic (BooleanGrep) + + $args = { -as => $args }; + } + + elsif ( 'HASH' ne ref $args ) { + _croak( 'argument to ', __PACKAGE__, '::import must be string or hash' ) + unless grep { /$args/ } @EXPORT; ## no critic (BooleanGrep) + } + else { + # make a copy as it gets modified later on + $args = {%$args}; + } + + _croak( 'cannot mix -base and -class' ) + if !!$args->{-base} && exists $args->{-class}; + + $DEBUG = $ENV{HASH_WRAP_DEBUG} // delete $args->{-debug}; + + # -as may be explicitly 'undef' to indicate use in a standalone class + $args->{-as} = 'wrap_hash' unless exists $args->{-as}; + my $name = delete $args->{-as}; + + my $target = delete $args->{-into} // caller; + + if ( defined $name ) { + + if ( defined( my $reftype = Scalar::Util::reftype( $name ) ) ) { + _croak( '-as must be undefined or a string or a reference to a scalar' ) + if $reftype ne 'SCALAR' + && $reftype ne 'VSTRING' + && $reftype ne 'REF' + && $reftype ne 'GLOB' + && $reftype ne 'LVALUE' + && $reftype ne 'REGEXP'; + + $args->{-as_scalar_ref} = $name; + + } + + elsif ( $name eq '-return' ) { + $args->{-as_return} = 1; + } + } + + if ( $args->{-base} ) { + _croak( q{don't use -as => -return with -base} ) + if $args->{-as_return}; + $args->{-class} = $target; + $args->{-new} = 1 if !exists $args->{-new}; + _build_class( $target, $name, $args ); + } + + else { + _build_class( $target, $name, $args ); + if ( defined $name ) { + my $sub = _build_constructor( $target, $name, $args ); + if ( $args->{-as_return} ) { + push @return, $sub; + } + elsif ( $args->{-lexical} ) { + _croak( "Perl >= v5.38 is required for -lexical; current perl is $^V" ) + unless HAS_LEXICAL_SUBS; + builtin::export_lexically( $name, $sub ); + } + } + } + + # clean out known attributes + delete @{$args}{ + qw[ -as -as_return -as_scalar_ref -base -class -clone + -copy -defined -exists -immutable -lexical -lockkeys -lvalue + -methods -new -predicate -recurse -undef ] + }; + + if ( keys %$args ) { + _croak( 'unknown options passed to ', __PACKAGE__, '::import: ', join( ', ', keys %$args ) ); + } + } + + return @return; +} + +sub _build_class { ## no critic(ExcessComplexity) + my ( $target, $name, $attr ) = @_; + + # in case we're called inside a recursion and the recurse count + # has hit zero, default behavior is no recurse, so remove it so + # the attr signature computed below isn't contaminated by a + # useless -recurse => 0 attribute. + if ( exists $attr->{-recurse} ) { + _croak( '-recurse must be a number' ) + unless Scalar::Util::looks_like_number( $attr->{-recurse} ); + delete $attr->{-recurse} if $attr->{-recurse} == 0; + } + + if ( !defined $attr->{-class} ) { + + ## no critic (ComplexMappings) + my @class = map { + ( my $key = $_ ) =~ s/-//; + ( $key, defined $attr->{$_} ? $attr->{$_} : '' ) + } sort keys %$attr; + + $attr->{-class} = join q{::}, 'Hash::Wrap::Class', Digest::MD5::md5_hex( @class ); + } + + elsif ( $attr->{-class} eq '-target' || $attr->{-class} eq '-caller' ) { + _croak( "can't set -class => '@{[ $attr->{-class} ]}' if '-as' is not a plain string" ) + if ref $name; + $attr->{-class} = $target . q{::} . $name; + } + + my $class = $attr->{-class}; + + return $class if defined $REGISTRY{$class}; + my $rentry = $REGISTRY{$class} = { methods => {} }; + + my %closures; + my @BODY; + my %dict = ( + class => $class, + signature => q{}, + body => \@BODY, + autoload_attr => q{}, + validate_inline => 'exists $self->{\<>}', + validate_method => 'exists $self->{$key}', + set => '$self->{q[\<>]} = $_[0] if @_;', + return_value => '$self->{q[\<>]}', + recursion_constructor => q{}, + predicate_template => q{}, + ); + + if ( $attr->{-lvalue} ) { + if ( $] lt '5.016000' ) { + _croak( 'lvalue accessors require Perl 5.16 or later' ) + if $attr->{-lvalue} < 0; + } + else { + $dict{autoload_attr} = q[: lvalue]; + $dict{signature} = q[: lvalue]; + } + } + + if ( $attr->{-undef} ) { + $dict{validate_method} = q[ 1 ]; + $dict{validate_inline} = q[ 1 ]; + } + + if ( $attr->{-exists} ) { + $dict{exists} = $attr->{-exists} =~ PerlIdentifier ? $1 : 'exists'; + push @BODY, q[ sub <> { exists $_[0]->{$_[1] } } ]; + $rentry->{methods}{ $dict{exists} } = undef; + } + + if ( $attr->{-defined} ) { + $dict{defined} = $attr->{-defined} =~ PerlIdentifier ? $1 : 'defined'; + push @BODY, q[ sub <> { defined $_[0]->{$_[1] } } ]; + $rentry->{methods}{ $dict{defined} } = undef; + } + + if ( $attr->{-immutable} ) { + $dict{set} = <<'END'; + Hash::Wrap::_croak( q[Modification of a read-only value attempted]) + if @_; +END + } + + if ( $attr->{-recurse} ) { + + # decrement recursion limit. It's infinite recursion if + # -recurse < 0; always set to -1 so we keep using the same + # class. Note that -recurse will never be zero upon entrance + # of this block, as -recurse => 0 is removed from the + # attributes way upstream. + + $dict{recurse_limit} = --$attr->{-recurse} < 0 ? -1 : $attr->{-recurse}; + + $dict{quoted_key} = 'q[\<>]'; + $dict{hash_value} = '$self->{<>}'; + + $dict{recurse_wrap_hash} = '$<>::recurse_into_hash->( <> )'; + + $dict{return_value} = <<'END'; + 'HASH' eq (Scalar::Util::reftype( <> ) // q{}) + && ! Scalar::Util::blessed( <> ) + ? <> + : <>; +END + if ( $attr->{-copy} ) { + + if ( $attr->{-immutable} ) { + $dict{wrap_hash_entry} = <<'END'; + do { Hash::Util::unlock_ref_value( $self, <> ); + <> = <>; + Hash::Util::lock_ref_value( $self, <> ); + <>; + } +END + } + else { + $dict{wrap_hash_entry} = '<> = <>'; + } + + } + else { + $dict{wrap_hash_entry} = '<>'; + } + + # do a two-step initialization of the constructor. If + # the initialization sub is stored in $recurse_into_hash, and then + # $recurse_into_hash is set to the actual constructor I worry that + # Perl may decide to garbage collect the setup subroutine while it's + # busy setting $recurse_into_hash. So, store the + # initialization sub in something other than $recurse_into_hash. + + $dict{recursion_constructor} = <<'END'; +our $recurse_into_hash; +our $setup_recurse_into_hash = sub { + require Hash::Wrap; + ( $recurse_into_hash ) = Hash::Wrap->import ( { %$attr, -as => '-return', + -recurse => <> } ); + goto &$recurse_into_hash; +}; +$recurse_into_hash = $setup_recurse_into_hash; +END + + my %attr = ( %$attr, -recurse => --$attr->{-recurse} < 0 ? -1 : $attr->{-recurse}, ); + delete @attr{qw( -as_scalar_ref -class -base -as )}; + $closures{'$attr'} = \%attr; + } + + if ( $attr->{-predicate} ) { + $dict{predicate_template} = <<'END'; +our $predicate_template = q[ + package \<>; + + use Scalar::Util (); + + sub has_\<> { + my $self = shift; + + Hash::Wrap::_croak_class_method( $self, 'has_\<>' ) + unless Scalar::Util::blessed( $self ); + + return exists $self->{\<>}; + } + + $Hash::Wrap::REGISTRY{methods}{'has_\<>'} = undef; + + \&has_\<>; +]; +END + } + + my $class_template = <<'END'; +package <>; + +<> + +use Scalar::Util (); + +our $validate = sub { + my ( $self, $key ) = @_; + return <>; +}; + +<> + +our $accessor_template = q[ + package \<>; + + use Scalar::Util (); + + sub \<> <> { + my $self = shift; + + Hash::Wrap::_croak_class_method( $self, '\<>' ) + unless Scalar::Util::blessed( $self ); + + Hash::Wrap::_croak_object_method( $self, '\<>' ) + unless ( <> ); + + <> + + return <>; + } + \&\<>; +]; + +<> + + +<> + +our $AUTOLOAD; +sub AUTOLOAD <> { + goto &{ Hash::Wrap::_autoload( q[<>], $AUTOLOAD, $_[0] ) }; +} + +sub DESTROY { } + +sub can { + return Hash::Wrap::_can( @_, q[<>] ); +} + +1; +END + + _compile_from_tpl( \$class_template, \%dict, keys %closures ? \%closures : () ) + or _croak_about_code( \$class_template, "class $class" ); + + if ( !!$attr->{-new} ) { + my $lname = $attr->{-new} =~ PerlIdentifier ? $1 : 'new'; + _build_constructor( $class, $lname, { %$attr, -as_method => 1 } ); + } + + if ( $attr->{-methods} ) { + + my $methods = $attr->{-methods}; + _croak( '-methods option value must be a hashref' ) + unless 'HASH' eq ref $methods; + + for my $mth ( keys %$methods ) { + _croak( "method name '$mth' is not a valid Perl identifier" ) + if $mth !~ PerlIdentifier; + + my $code = $methods->{$mth}; + _croak( qq{value for method "$mth" must be a coderef} ) + unless 'CODE' eq ref $code; + no strict 'refs'; ## no critic (ProhibitNoStrict) + *{"${class}::${mth}"} = $code; + } + + $rentry->{methods}{$_} = undef for keys %$methods; + } + + push @CARP_NOT, $class; + $rentry->{accessor_template} + = _find_symbol( $class, 'accessor_template', [ 'SCALAR', undef ] ); + + if ( $attr->{-predicate} ) { + $rentry->{predicate_template} + = _find_symbol( $class, 'predicate_template', [ 'SCALAR', undef ] ); + } + + $rentry->{validate} = _find_symbol( $class, 'validate', [ 'REF', 'CODE' ] ); + + Scalar::Util::weaken( $rentry->{validate} ); + + return $class; +} + +sub _build_constructor { ## no critic (ExcessComplexity) + my ( $package, $name, $args ) = @_; + + # closure for user provided clone sub + my %closures; + + _croak( 'cannot mix -copy and -clone' ) + if exists $args->{-copy} && exists $args->{-clone}; + + my @USE; + my %dict = ( + package => $package, + constructor_name => $name, + use => \@USE, + package_return_value => '1;', + ); + + $dict{class} + = $args->{-as_method} + ? 'shift;' + : 'q[' . $args->{-class} . '];'; + + my @copy = ( + 'Hash::Wrap::_croak(q{the argument to <>::<> must not be an object})', + ' if Scalar::Util::blessed( $hash );', + ); + + if ( $args->{-copy} ) { + push @copy, '$hash = { %{ $hash } };'; + } + + elsif ( exists $args->{-clone} ) { + + if ( 'CODE' eq ref $args->{-clone} ) { + $closures{'clone'} = $args->{-clone}; + # overwrite @copy, as the clone sub could take an object. + @copy = ( + 'state $clone = $CLOSURES->{clone};', + '$hash = $clone->($hash);', + 'Hash::Wrap::_croak(q{the custom clone routine for <> returned an object instead of a plain hash})', + ' if Scalar::Util::blessed( $hash );', + ); + } + else { + push @USE, q[use Storable ();]; + push @copy, '$hash = Storable::dclone $hash;'; + } + } + + $dict{copy} = join "\n", @copy; + + $dict{lock} = do { + my @eval; + + if ( defined( my $opts = $args->{-immutable} || undef ) ) { + + push @USE, q[use Hash::Util ();]; + + if ( 'ARRAY' eq ref $opts ) { + _croak( "-immutable: attribute name ($_) is not a valid Perl identifier" ) + for grep { $_ !~ PerlIdentifier } @{$opts}; + + push @eval, + 'Hash::Util::lock_keys_plus(%$hash, qw{ ' . join( q{ }, @{$opts} ) . ' });', + '@{$hash}{Hash::Util::hidden_keys(%$hash)} = ();', + ; + } + + push @eval, 'Hash::Util::lock_hash(%$hash)'; + } + elsif ( defined( $opts = $args->{-lockkeys} || undef ) ) { + + push @USE, q[use Hash::Util ();]; + + if ( 'ARRAY' eq ref $args->{-lockkeys} ) { + _croak( "-lockkeys: attribute name ($_) is not a valid Perl identifier" ) + for grep { $_ !~ PerlIdentifier } @{ $args->{-lockkeys} }; + + push @eval, + 'Hash::Util::lock_keys_plus(%$hash, qw{ ' . join( q{ }, @{ $args->{-lockkeys} } ) . ' });'; + } + elsif ( $args->{-lockkeys} ) { + + push @eval, 'Hash::Util::lock_keys(%$hash)'; + } + } + + join( "\n", @eval ); + + }; + + # return the constructor sub from the factory and don't insert the + # name into the package namespace + if ( $args->{-as_scalar_ref} || $args->{-as_return} || $args->{-lexical} ) { + $dict{package_return_value} = q{}; + $dict{constructor_name} = q{}; + } + + #<<< no tidy + my $code = <<'ENDCODE'; + package <>; + + <> + use Scalar::Util (); + + no warnings 'redefine'; + + sub <> (;$) { + my $class = <> + my $hash = shift // {}; + + Hash::Wrap::_croak( 'argument to <>::<> must be a hashref' ) + if 'HASH' ne Scalar::Util::reftype($hash); + <> + bless $hash, $class; + <> + } + <> + +ENDCODE + #>>> + + my $result = _compile_from_tpl( \$code, \%dict, keys %closures ? \%closures : () ) + || _croak_about_code( \$code, "constructor (as $name) subroutine" ); + + # caller asked for a coderef to be stuffed into a scalar + ${$name} = $result if $args->{-as_scalar_ref}; + return $result; +} + +sub _croak_about_code { + my ( $code, $what, $error ) = @_; + $error //= $@; + _line_number_code( $code ); + _croak( qq[error compiling $what: $error\n$$code] ); +} + +sub _line_number_code { + my ( $code ) = @_; + chomp( $$code ); + $$code .= "\n"; + my $space = length( $$code =~ tr/\n// ); + my $line = 0; + $$code =~ s/^/sprintf "%${space}d: ", ++$line/emg; +} + +sub _compile_from_tpl { + my ( $code, $dict, $closures ) = @_; + + if ( defined $closures && %$closures ) { + + # add code to create lexicals if the keys begin with a q{$} + $dict->{closures} = join( "\n", + map { "my $_ = \$CLOSURES->{'$_'};" } + grep { substr( $_, 0, 1 ) eq q{$} } + keys %$closures ); + } + + _interpolate( $code, $dict ); + + if ( $DEBUG ) { + my $lcode = $$code; + _line_number_code( \$lcode ); + print STDERR $lcode; + } + + _clean_eval( $code, exists $dict->{closures} ? $closures : () ); + +} + +# eval in a clean lexical space. +sub _clean_eval { + ## no critic (StringyEval RequireCheckingReturnValueOfEval ) + if ( @_ > 1 ) { + ## no critic (UnusedVars) + my $CLOSURES = $_[1]; + eval( ${ $_[0] } ); + } + else { + eval( ${ $_[0] } ); + } + +} + +sub _interpolate { + my ( $tpl, $dict, $work ) = @_; + $work = { loop => {} } unless defined $work; + + $$tpl =~ s{(\\)?\<\<(\w+)\>\> + }{ + if ( defined $1 ) { + "<<$2>>"; + } + else { + my $key = lc $2; + my $v = $dict->{$key}; + if ( defined $v ) { + $v = join( "\n", @$v ) + if 'ARRAY' eq ref $v; + + _croak( "circular interpolation loop detected for $key" ) + if $work->{loop}{$key}++; + _interpolate( \$v, $dict, $work ); + --$work->{loop}{$key}; + $v; + } + else { + q{}; + } + } + }gex; + return; +} + +1; + +# +# This file is part of Hash-Wrap +# +# This software is Copyright (c) 2017 by Smithsonian Astrophysical Observatory. +# +# This is free software, licensed under: +# +# The GNU General Public License, Version 3, June 2007 +# + +__END__ + +=pod + +=for :stopwords Diab Jerius Smithsonian Astrophysical Observatory getter + +=head1 NAME + +Hash::Wrap - create on-the-fly objects from hashes + +=head1 VERSION + +version 1.09 + +=head1 SYNOPSIS + + use Hash::Wrap; + + my $result = wrap_hash( { a => 1 } ); + print $result->a; # prints + print $result->b; # throws + + # import two constructors, and with different behaviors. + use Hash::Wrap + { -as => 'cloned', clone => 1}, + { -as => 'copied', copy => 1 }; + + my $cloned = cloned( { a => 1 } ); + print $cloned->a; + + my $copied = copied( { a => 1 } ); + print $copied->a; + + # don't pollute your namespace + my $wrap; + use Hash::Wrap { -as => \$wrap}; + my $obj = $wrap->( { a => 1 } ); + + # apply constructors to hashes two levels deep into the hash + use Hash::Wrap { -recurse => 2 }; + + # apply constructors to hashes at any level + use Hash::Wrap { -recurse => -1 }; + +=head1 DESCRIPTION + +B creates objects from hashes, providing accessors for +hash elements. The objects are hashes, and may be modified using the +standard Perl hash operations and the object's accessors will behave +accordingly. + +Why use this class? Sometimes a hash is created on the fly and it's too +much of a hassle to build a class to encapsulate it. + + sub foo () { ... ; return { a => 1 }; } + +With C: + + use Hash::Wrap; + + sub foo () { ... ; return wrap_hash( { a => 1 ); } + + my $obj = foo (); + print $obj->a; + +Elements can be added or removed to the object and accessors will +track them. The object may be made immutable, or may have a restricted +set of attributes. + +There are many similar modules on CPAN (see L for comparisons). + +What sets B apart is that it's possible to customize +object construction and accessor behavior: + +=over + +=item * + +It's possible to use the passed hash directly, or make shallow or deep +copies of it. + +=item * + +Accessors can be customized so that accessing a non-existent element +can throw an exception or return the undefined value. + +=item * + +On recent enough versions of Perl, accessors can be lvalues, e.g. + + $obj->existing_key = $value; + +=back + +=head1 USAGE + +=head2 Simple Usage + +C'ing B without options imports a subroutine called +B which takes a hash, blesses it into a wrapper class and +returns the hash: + + use Hash::Wrap; + + my $h = wrap_hash { a => 1 }; + print $h->a, "\n"; # prints 1 + +B<[API change @ v1.0]> +The passed hash must be a plain hash (i.e. not an object or blessed +hash). To pass an object, you must specify a custom clone subroutine +returning a plain hashref via the L option. + +The wrapper class has no constructor method, so the only way to create +an object is via the B subroutine. (See L +for more about wrapper classes) If B is called without +arguments, it will create a hash for you. + +=head2 Advanced Usage + +=head3 B is an awful name for the constructor subroutine + +So rename it: + + use Hash::Wrap { -as => "a_much_better_name_for_wrap_hash" }; + + $obj = a_much_better_name_for_wrap_hash( { a => 1 } ); + +=head3 The Wrapper Class name matters + +If the class I matters, but it'll never be instantiated +except via the imported constructor subroutine: + + use Hash::Wrap { -class => 'My::Class' }; + + my $h = wrap_hash { a => 1 }; + print $h->a, "\n"; # prints 1 + $h->isa( 'My::Class' ); # returns true + +or, if you want it to reflect the current package, try this: + + package Foo; + use Hash::Wrap { -class => '-target', -as => 'wrapit' }; + + my $h = wrapit { a => 1 }; + $h->isa( 'Foo::wrapit' ); # returns true + +Again, the wrapper class has no constructor method, so the only way to +create an object is via the generated subroutine. + +=head3 The Wrapper Class needs its own class constructor method + +To generate a wrapper class which can be instantiated via its own +constructor method: + + use Hash::Wrap { -class => 'My::Class', -new => 1 }; + +The default B constructor subroutine is still exported, so + + $h = My::Class->new( { a => 1 } ); + +and + + $h = wrap_hash( { a => 1 } ); + +do the same thing. + +To give the constructor method a different name: + + use Hash::Wrap { -class => 'My::Class', -new => '_my_new' }; + +To prevent the constructor subroutine from being imported: + + use Hash::Wrap { -as => undef, -class => 'My::Class', -new => 1 }; + +=head3 A stand alone Wrapper Class + +To create a stand alone wrapper class, + + package My::Class; + + use Hash::Wrap { -base => 1 }; + + 1; + +And later... + + use My::Class; + + $obj = My::Class->new( \%hash ); + +It's possible to modify the constructor and accessors: + + package My::Class; + + use Hash::Wrap { -base => 1, -new => 'new_from_hash', -undef => 1 }; + + 1; + +=head2 Recursive wrapping + +B can automatically wrap nested hashes using the +L option. + +=head3 Using the original hash + +The L option allows mapping nested hashes onto chained +methods, e.g. + + use Hash::Wrap { -recurse => -1, -as => 'recwrap' }; + + my %hash = ( a => { b => { c => 'd' } } ); + + my $wrap = recwrap(\%hash); + + $wrap->a->b->c eq 'd'; # true + +Along the way, B<%hash>, B<$hash{a}>, B<$hash{b}>, B<$hash{c}> are all +blessed into wrapping classes. + +=head3 Copying the original hash + +If L is also specified, then the relationship between the +nested hashes in the original hash and those hashes retrieved by +wrapper methods depends upon what level in the structure has been +wrapped. For example, + + use Hash::Wrap { -recurse => -1, -copy => 1, -as => 'copyrecwrap' }; + use Scalar::Util 'refaddr'; + + my %hash = ( a => { b => { c => 'd' } } ); + + my $wrap = copyrecwrap(\%hash); + + refaddr( $wrap ) != refaddr( \%hash ); + +Because the C<< $wrap->a >> method hasn't been called, then the B<$hash{a}> structure +has yet to be wrapped, so, using C<$wrap> as a hash, + + refaddr( $wrap->{a} ) == refaddr( $hash{a} ); + +However, + + # invoking $wrap->a wraps a copy of $hash{a} because of the -copy + # attribute + refaddr( $wrap->a ) != refaddr( $hash{a} ); + + # so $wrap->{a} is no longer the same as $hash{a}: + refaddr( $wrap->{a} ) != refaddr( $hash{a} ); + refaddr( $wrap->{a} ) == refaddr( $wrap->a ); + +=head3 Importing into an alternative package + +Normally the constructor is installed into the package importing C. +The C<-into> option can change that: + + package This::Package; + use Hash::Wrap { -into => 'Other::Package' }; + +will install B. + +=head1 OPTIONS + +B works at import time. To modify its behavior pass it +options when it is C'd: + + use Hash::Wrap { %options1 }, { %options2 }, ... ; + +Multiple options hashes may be passed; each hash specifies options for +a separate constructor or class. + +For example, + + use Hash::Wrap + { -as => 'cloned', clone => 1}, + { -as => 'copied', copy => 1 }; + +creates two constructors, C and C with different +behaviors. + +=head2 Constructor + +=over + +=item C<-as> => I || C || I || C<-return> + +(This defaults to the string C ) + +If the argument is + +=over + +=item * + +a string (but not the string C<-return>) + +Import the constructor subroutine with the given name. + +=item * + +undefined + +Do not import the constructor. This is usually only used with the +L option. + +=item * + +a scalar ref + +Do not import the constructor. Store a reference to the constructor +into the scalar. + +=item * + +The string C<-return>. + +Do not import the constructor. The constructor subroutine(s) will be +returned from C's C method. This is a fairly +esoteric way of doing things: + + require Hash::Wrap; + ( $copy, $clone ) = Hash::Wrap->import( { -as => '-return', copy => 1 }, + { -as => '-return', clone => 1 } ); + +A list is always returned, even if only one constructor is created. + +=back + +=item C<-copy> => I + +If true, the object will store the data in a I copy of the +hash. By default, the object uses the hash directly. + +=item C<-clone> => I | I + +Store the data in a deep copy of the hash. if I, +L is used. If a coderef, it will be called as + + $clone = $coderef->( $hash ) + +C<$coderef> must return a plain hashref. + +By default, the object uses the hash directly. + +=item C<-lexical> => I + +On Perl v5.38 or higher, this will cause the constructor subroutine to +be installed lexically in the target package. + +On Perls prior to v5.38 this causes an exception. + +=item C<-immutable> => I | I + +If the value is I, the object's attributes and values are locked +and may not be altered. Note that this locks the underlying hash. + +If the value is an array reference, it specifies which attributes are +allowed, I. Attributes which are +not set when the object is created are set to C. For example, + + use Hash::Wrap { -immutable => [ qw( a b c ) ] }; + + my $obj = wrap_hash( { a => 1, b => 2 } ); + + ! defined( $obj->c ) == true; # true statement. + +=item C<-lockkeys> => I | I + +If the value is I, the object's attributes are restricted to the +existing keys in the hash. If it is an array reference, it specifies +which attributes are allowed, I. +The attribute's values are not locked. Note that this locks the +underlying hash. + +=item C<-into> => I + +The name of the package in which to install the constructor. By default +it's that of the caller. + +=back + +=head2 Accessors + +=over + +=item C<-undef> => I + +Normally an attempt to use an accessor for an non-existent key will +result in an exception. This option causes the accessor +to return C instead. It does I create an element in +the hash for the key. + +=item C<-lvalue> => I + +If non-zero, the accessors will be lvalue routines, e.g. they can +change the underlying hash value by assigning to them: + + $obj->attr = 3; + +The hash entry I or this will throw an exception. + +lvalue subroutines are only available on Perl version 5.16 and later. + +If C<-lvalue = 1> this option will silently be ignored on earlier +versions of Perl. + +If C<-lvalue = -1> this option will cause an exception on earlier +versions of Perl. + +=item C<-recurse> => I + +Normally only the top level hash is wrapped in a class. This option +specifies how many levels deep into the hash hashes should be wrapped. +For example, if + + %h = ( l => 0, a => { l => 1, b => { l => 2, c => { l => 3 } } } }; + + use Hash::Wrap { -recurse => 0 }; + $h->l # => 0 + $h->a->l # => ERROR + + use Hash::Wrap { -recurse => 1 }; + $h->l # => 0 + $h->a->l # => 1 + $h->a->b->l # => ERROR + + use Hash::Wrap { -recurse => 2 }; + $h->l # => 0 + $h->a->l # => 1 + $h->a->b->l # => 2 + $h->a->b->c->l # => ERROR + +For infinite recursion, set C<-recurse> to C<-1>. + +Constructors built for deeper hash levels will not heed the +C<-as_scalar_ref>, C<-class>, C<-base>, or C<-as> attributes. + +=back + +=head2 Class + +=over + +=item C<-base> => I + +If true, the enclosing package is converted into a proxy wrapper +class. This should not be used in conjunction with C<-class>. See +L. + +=item C<-class> => I + +A class with the given name will be created and new objects will be +blessed into the specified class by the constructor subroutine. The +new class will not have a constructor method. + +If I is the string C<-target> (or, deprecated, +C<-caller>), then the class name is set to the fully qualified name of +the constructor, e.g. + + package Foo; + use Hash::Wrap { -class => '-target', -as => 'wrap_it' }; + +results in a class name of C. + +If not specified, the class name will be constructed based upon the +options. Do not rely upon this name to determine if an object is +wrapped by B. + +=item C<-new> => I | I + +Add a class constructor method. + +If C<-new> is a true boolean value, the method will be called +C. Otherwise C<-new> specifies the name of the method. + +=back + +=head3 Extra Class Methods + +=over + +=item C<-defined> => I | I + +Add a method which returns true if the passed hash key is defined or +does not exist. If C<-defined> is a true boolean value, the method +will be called C. Otherwise it specifies the name of the +method. For example, + + use Hash::Wrap { -defined => 1 }; + $obj = wrap_hash( { a => 1, b => undef } ); + + $obj->defined( 'a' ); # TRUE + $obj->defined( 'b' ); # FALSE + $obj->defined( 'c' ); # FALSE + +or + + use Hash::Wrap { -defined => 'is_defined' }; + $obj = wrap_hash( { a => 1 } ); + $obj->is_defined( 'a' ); + +=item C<-exists> => I | I + +Add a method which returns true if the passed hash key exists. If +C<-exists> is a boolean, the method will be called +C. Otherwise it specifies the name of the method. For example, + + use Hash::Wrap { -exists => 1 }; + $obj = wrap_hash( { a => 1 } ); + $obj->exists( 'a' ); + +or + + use Hash::Wrap { -exists => 'is_present' }; + $obj = wrap_hash( { a => 1 } ); + $obj->is_present( 'a' ); + +=item C<-predicate> => I + +This adds the more traditionally named predicate methods, such as +C for attribute C. Note that this option makes any +elements which begin with C unavailable via the generated +accessors. + +=item C<-methods> => { I => I, ... } + +Install the passed code references into the class with the specified +names. These override any attributes in the hash. For example, + + use Hash::Wrap { -methods => { a => sub { 'b' } } }; + + $obj = wrap_hash( { a => 'a' } ); + $obj->a; # returns 'b' + +=back + +=head1 WRAPPER CLASSES + +A wrapper class has the following characteristics. + +=over + +=item * + +It has the methods C, C and C. + +=item * + +It will have other methods if the C<-undef> and C<-exists> options are +specified. It may have other methods if it is L. + +=item * + +It will have a constructor if either of C<-base> or C<-new> is specified. + +=back + +=head2 Wrapper Class Limitations + +=over + +=item * + +Wrapper classes have C, C method, and C +methods, which will mask hash keys with the same names. + +=item * + +Classes which are generated without the C<-base> or C<-new> options do +not have a class constructor method, e.g C<< Class->new() >> will +I return a new object. The only way to instantiate them is via +the constructor subroutine generated via B. This allows +the underlying hash to have a C attribute which would otherwise +be masked by the constructor. + +=back + +=head1 LIMITATIONS + +=head2 Lvalue accessors + +Lvalue accessors are available only on Perl 5.16 and later. + +=head2 Accessors for deleted hash elements + +Accessors for deleted elements are not removed. The class's C +method will return C for them, but they are still available in +the class's stash. + +=head2 Wrapping immutable structures + +Locked (e.g. immutable) hashes cannot be blessed into a class. This +will cause B to fail if it is asked to work directly +(without cloning or copying) on a locked hash or recursive wrapping is +specified and the hash contains nested locked hashes. + +To create an immutable B object from an immutable hash, +use the L and L attributes. The L +attribute performs a shallow copy of the hash which is then locked by +L. The default L option will not work, as it +will clone the immutability of the input hash. + +Adding the L option will properly create an immutable +wrapped object when used on locked hashes. It does not suffer the +issue described in L in L. + +=head2 Cloning with recursion + +Cloning by default uses L, which performs a deep clone +of the passed hash. In recursive mode, the clone operation is performed at every +wrapping of a nested hash, causing some data to be repeatedly cloned. +This does not create a memory leak, but it is inefficient. Consider +using L instead of L with L. + +=head1 BUGS + +=head2 Eventual immutability in nested structures + +Immutability is added to mutable nested structures as they are +traversed via method calls. This means that the hash underlying the +wrapper object is not fully immutable until all nested hashes have +been visited via methods. + +For example, + + use Hash::Wrap { -immutable => 1, -recurse => -1, -as 'immutable' }; + + my $wrap = immutable( { a => { b => 2 } } ); + $wrap->{a} = 11; # expected fail: IMMUTABLE + $wrap->{a}{b} = 22; # unexpected success: NOT IMMUTABLE + $wrap->a; + $wrap->{a}{b} = 33; # expected fail: IMMUTABLE; $wrap->{a} is now locked + +=head1 EXAMPLES + +=head2 Existing keys are not compatible with method names + +If a hash key contains characters that aren't legal in method names, +there's no way to access that hash entry. One way around this is to +use a custom clone subroutine which modifies the keys so they are +legal method names. The user can directly insert a non-method-name +key into the C object after it is created, and those still +have a key that's not available via a method, but there's no cure for +that. + +=head1 SEE ALSO + +Here's a comparison of this module and others on CPAN. + +=over + +=item B (this module) + +=over + +=item * core dependencies only + +=item * object tracks additions and deletions of entries in the hash + +=item * optionally applies object paradigm recursively + +=item * accessors may be lvalue subroutines + +=item * accessing a non-existing element via an accessor +throws by default, but can optionally return C + +=item * can use custom package + +=item * can copy/clone existing hash. clone may be customized + +=item * can add additional methods to the hash object's class + +=item * optionally stores the constructor in a scalar + +=item * optionally provides per-attribute predicate methods +(e.g. C) + +=item * optionally provides methods to check an attribute existence or +whether its value is defined + +=item * can create immutable objects + +=back + +=item L + +As you might expect from a DCONWAY module, this does just +about everything you'd like. It has a very heavy set of dependencies. + +=item L + +=over + +=item * core dependencies only + +=item * applies object paradigm recursively + +=item * accessing a non-existing element via an accessor creates it + +=back + +=item L + +=over + +=item * moderate dependency chain (no XS?) + +=item * applies object paradigm recursively + +=item * accessing a non-existing element throws + +=back + +=item L + +=over + +=item * core dependencies only + +=item * only applies object paradigm to top level hash + +=item * can add generic accessor, mutator, and element management methods + +=item * accessing a non-existing element via an accessor creates it +(not documented, but code implies it) + +=item * C doesn't work + +=back + +=item L + +=over + +=item * core dependencies only + +=item * accessing a non-existing element via an accessor returns undef + +=item * applies object paradigm recursively + +=back + +=item L + +=over + +=item * moderate dependency chain. Requires XS, tied hashes + +=item * applies object paradigm recursively + +=item * accessing a non-existing element via an accessor creates it + +=back + +=item L + +=over + +=item * light dependency chain. Requires XS. + +=item * only applies object paradigm to top level hash + +=item * accessing a non-existing element throws, but if an existing +element is accessed, then deleted, accessor returns undef rather than +throwing + +=item * can use custom package + +=back + +=item L + +=over + +=item * uses source filters + +=item * applies object paradigm recursively + +=back + +=item L + +=over + +=item * light dependency chain + +=item * applies object paradigm recursively + +=item * accessing a non-existing element via an accessor creates it + +=back + +=item L + +=over + +=item * core dependencies only + +=item * no documentation + +=back + +=item L + +=over + +=item * core dependencies only + +=item * only applies object paradigm to top level hash + +=item * accessors may be lvalue subroutines + +=item * accessing a non-existing element via an accessor +returns C by default, but can optionally throw. Changing behavior +is done globally, so all objects are affected. + +=item * accessors must be explicitly added. + +=item * accessors may have aliases + +=item * values may be validated + +=item * invoking an accessor may trigger a callback + +=back + +=item L + +=over + +=item * minimal non-core dependencies (L) + +=item * uses L if available + +=item * only applies object paradigm to top level hash + +=item * provides separate getter and predicate methods, but only +for existing keys in hash. + +=item * hash keys are locked. + +=item * operates directly on hash. + +=back + +=item L + +=over + +=item * has a cool name + +=item * core dependencies only + +=item * locks hash by default + +=item * optionally recurses into the hash + +=item * does not track changes to hash + +=item * can destroy class + +=item * can add methods + +=item * can use custom package + +=back + +=back + +=head1 SUPPORT + +=head2 Bugs + +Please report any bugs or feature requests to bug-hash-wrap@rt.cpan.org or through the web interface at: L + +=head2 Source + +Source is available at + + https://codeberg.org/djerius/p5-Hash-Wrap + +and may be cloned from + + https://codeberg.org/djerius/p5-Hash-Wrap.git + +=head1 AUTHOR + +Diab Jerius + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2017 by Smithsonian Astrophysical Observatory. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff --git a/dev/bench/results/baseline-078e0b3d7.json b/dev/bench/results/baseline-078e0b3d7.json new file mode 100644 index 000000000..b111a71cb --- /dev/null +++ b/dev/bench/results/baseline-078e0b3d7.json @@ -0,0 +1,21 @@ +{ + "git_sha": "078e0b3d7", + "date": "2026-04-21T21:17:48Z", + "runs": 3, + "jperl": "/Users/fglock/projects/PerlOnJava3/jperl", + "perl": "perl", + "perl_version": "5.042000", + "benchmarks": { + "benchmark_anon_simple": { "unit": "s", "jperl": [7.149,7.020,7.213], "perl": [1.435,1.454,1.427] }, + "benchmark_closure": { "unit": "s", "jperl": [8.784,9.783,9.768], "perl": [8.108,7.961,7.877] }, + "benchmark_eval_string": { "unit": "s", "jperl": [14.766,14.777,14.365], "perl": [3.135,3.164,3.276] }, + "benchmark_global": { "unit": "s", "jperl": [14.608,14.579,14.720], "perl": [10.993,11.063,9.400] }, + "benchmark_lexical": { "unit": "s", "jperl": [4.059,4.010,3.989], "perl": [10.589,10.581,10.441] }, + "benchmark_method": { "unit": "s", "jperl": [2.620,2.537,2.607], "perl": [1.456,1.490,1.511] }, + "benchmark_refcount_anon": { "unit": "s", "jperl": [1.792,1.807,1.776], "perl": [0.455,0.447,0.443] }, + "benchmark_refcount_bless": { "unit": "s", "jperl": [1.293,1.305,1.311], "perl": [0.197,0.198,0.197] }, + "benchmark_regex": { "unit": "s", "jperl": [2.732,2.719,2.701], "perl": [1.974,2.005,2.006] }, + "benchmark_string": { "unit": "s", "jperl": [4.131,4.025,4.066], "perl": [6.887,6.867,6.977] }, + "life_bitpacked": { "unit": "Mcells/s", "jperl": [8.21,8.12,8.28], "perl": [20.99,20.58,20.70] } + } +} diff --git a/dev/bench/results/baseline-078e0b3d7.md b/dev/bench/results/baseline-078e0b3d7.md new file mode 100644 index 000000000..fb3f4aa34 --- /dev/null +++ b/dev/bench/results/baseline-078e0b3d7.md @@ -0,0 +1,23 @@ +# Benchmark baseline — 078e0b3d7 + +**Date:** 2026-04-21T21:17:48Z +**Runs per benchmark:** 3 +**jperl:** `/Users/fglock/projects/PerlOnJava3/jperl` +**perl:** `perl` (5.042000) + +For "time" benches lower = faster; ratio is `jperl / perl`. +For "Mcells/s" (life_bitpacked) higher = faster; ratio is `perl / jperl`. + +| Benchmark | unit | jperl | perl | ratio | parity? | +|---|---|---:|---:|---:|:---:| +| `benchmark_anon_simple` | s | 7.127 | 1.439 | **4.95×** | ❌ | +| `benchmark_closure` | s | 9.445 | 7.982 | **1.18×** | ≈ | +| `benchmark_eval_string` | s | 14.636 | 3.192 | **4.59×** | ❌ | +| `benchmark_global` | s | 14.636 | 10.485 | **1.40×** | ❌ | +| `benchmark_lexical` | s | 4.019 | 10.537 | **0.38×** | ✅ | +| `benchmark_method` | s | 2.588 | 1.486 | **1.74×** | ❌ | +| `benchmark_refcount_anon` | s | 1.792 | 0.448 | **4.00×** | ❌ | +| `benchmark_refcount_bless` | s | 1.303 | 0.197 | **6.61×** | ❌ | +| `benchmark_regex` | s | 2.717 | 1.995 | **1.36×** | ❌ | +| `benchmark_string` | s | 4.074 | 6.910 | **0.59×** | ✅ | +| `life_bitpacked` | Mcells/s | 8.203 | 20.757 | **2.53×** | ❌ | diff --git a/dev/design/classic_experiment_finding.md b/dev/design/classic_experiment_finding.md new file mode 100644 index 000000000..48a8c9af1 --- /dev/null +++ b/dev/design/classic_experiment_finding.md @@ -0,0 +1,102 @@ +# JPERL_CLASSIC experiment — cumulative-tax hypothesis confirmed + +**Branch:** `perf/perl-parity-phase1` @ 3c2ca4b6a + CLASSIC gate patches (4 files) +**Date:** 2026-04-18 +**Hypothesis:** The master→branch regression (1.67× on life_bitpacked) is NOT attributable to any single hot method. It is the cumulative cost of many small taxes added by the refcount/walker/weaken/DESTROY machinery, each individually invisible in a profile. + +## Test + +Added `JPERL_CLASSIC` env var (read once at class-init into a `static final boolean`). When set, short-circuits the branch's added machinery to near-master behavior: + +| Site | CLASSIC behavior | +|---|---| +| `MortalList.active` | `false` — every `deferDecrement*` / `scopeExitCleanup{Hash,Array}` / `mortalizeForVoidDiscard` early-returns | +| `EmitStatement.emitScopeExitNullStores` Phase 1 (`scopeExitCleanup` per scalar) | Not emitted | +| `EmitStatement.emitScopeExitNullStores` Phase 1b (cleanupHash/Array) | Not emitted | +| `EmitStatement.emitScopeExitNullStores` Phase E (`MyVarCleanupStack.unregister`) | Not emitted | +| `EmitStatement.emitScopeExitNullStores` Phase 3 (`MortalList.flush`) | Not emitted | +| `EmitVariable` `MyVarCleanupStack.register` on every `my` | Not emitted | +| `MyVarCleanupStack.register` / `unregister` | Early-return | +| `RuntimeScalar.scopeExitCleanup` | Early-return | +| `RuntimeScalar.setLargeRefCounted` | Direct field assignment, skipping refcount/WeakRefRegistry/MortalList work | + +Correctness: CLASSIC breaks DESTROY, weaken, walker semantics — only useful for measurement, not shipping. + +## Result — life_bitpacked + +`./jperl examples/life_bitpacked.pl -r none -g 500`, 5 runs each, median: + +| Mode | Runs (Mcells/s) | Median | +|---|---|---:| +| Baseline (branch machinery on) | 8.58 / 8.51 / 8.49 / 8.51 / 8.45 | **8.51** | +| `JPERL_CLASSIC=1` | 14.18 / 14.60 / 14.14 / 13.32 / 13.77 | **14.18** | +| System perl (reference) | — | 20.8 – 21.5 | +| Master @ pre-merge (reference) | — | 14.0 | + +**Speedup: 14.18 / 8.51 = 1.666×**, essentially recovering master's pre-merge number. + +## Result — benchmark_lexical (simple, no refs) + +`./jperl dev/bench/benchmark_lexical.pl`, 3 runs each: + +| Mode | Runs (iters/s) | Median | +|---|---|---:| +| Baseline | 313484 / 329270 / 314172 | **314172** | +| `JPERL_CLASSIC=1` | 357144 / 347743 / 359080 | **357144** | + +**Speedup: 1.14×** + +Even on a workload with no references and no blesses, the `my`-variable register/unregister emissions and scope-exit cleanup emissions cost ~14%. + +## Interpretation + +The hypothesis is definitively confirmed: + +1. **The master→branch perf gap is recoverable in full** (1.67× on the most ref-heavy workload) by gating the added machinery. +2. **No single site is the bottleneck.** Phase 1 (MortalList.flush) alone was worth 0.7%. Phase 2's pristine-args stub alone was worth 0%. The 1.67× comes from ~a dozen sites each contributing 2–10%. +3. **The taxes are broadly distributed across the scope-exit / variable-declaration / reference-assignment paths.** Even workloads that never exercise DESTROY/weaken pay them. + +## Implication for the plan + +The piecewise Phase 2'/3'/4' approach was the wrong framing. The right structural fix: + +**Make the machinery per-object-opt-in, not always-on.** Perl 5's design: `SvREFCNT_inc` is free for most SVs because the type tag gates the work. Only objects that need refcount tracking pay the cost. + +Concrete proposal (call it Phase R — "refcount by need"): + +1. Add a single `needsCleanup` bit to `RuntimeBase`, default `false`. +2. Set it to `true` only when: + - The object is blessed into a class that has `DESTROY`, OR + - The object is targeted by `Scalar::Util::weaken`, OR + - The object is captured by a CODE ref whose refCount we need to track for cycle break. +3. Every CURRENT-BRANCH fast-path site becomes `if (!needsCleanup) return ;`: + - `setLargeRefCounted` → direct assignment if neither side needs cleanup + - `scopeExitCleanup` → no-op if scalar's value doesn't need cleanup + - `MyVarCleanupStack.register` → skip if the var's referent doesn't need cleanup + - `MortalList.deferDecrement*` → skip if referent doesn't need cleanup + - `scopeExitCleanupHash/Array` → skip if container has no needsCleanup descendants + +With per-object gating, life_bitpacked (zero blessed objects, zero weaken) pays zero tax and runs at ~14 Mc/s. DBIx::Class / txn_scope_guard / destroy_eval_die (objects that DO need cleanup) still work correctly. + +This is a **significant refactor** — every site listed above needs a cheap gate check. But: + +- The CLASSIC experiment has already implemented those gate checks (just globally rather than per-object). Most of the code is the early-return condition. +- The JIT will fold the `needsCleanup == false` check away to almost nothing once it sees a type-stable call site. +- Correctness is easier to reason about than the current "always-tracked" design, because the gate explicitly matches the semantic condition that requires tracking. + +## Files touched in this experiment + +``` +src/main/java/org/perlonjava/runtime/runtimetypes/MortalList.java (+CLASSIC flag, active init) +src/main/java/org/perlonjava/runtime/runtimetypes/MyVarCleanupStack.java (register/unregister early-return) +src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java (setLargeRefCounted + scopeExitCleanup early-return) +src/main/java/org/perlonjava/backend/jvm/EmitStatement.java (4 emission sites gated) +src/main/java/org/perlonjava/backend/jvm/EmitVariable.java (register emission gated) +``` + +## Next step + +Either: +1. **Commit the CLASSIC gate** as a measurement tool on `perf/perl-parity-phase1` (doesn't ship to users; helps future perf work A/B the full-feature cost). +2. **Move directly to Phase R** (per-object `needsCleanup` bit) based on this evidence, using the CLASSIC gate sites as the map of what needs per-object gating. +3. **Revert** the CLASSIC gate and keep this document as the finding. diff --git a/dev/design/hash_wrap_triage_plan.md b/dev/design/hash_wrap_triage_plan.md new file mode 100644 index 000000000..8e834048d --- /dev/null +++ b/dev/design/hash_wrap_triage_plan.md @@ -0,0 +1,211 @@ +# Hash::Wrap `t/as_return.t` — GC-thrash / infinite-loop triage plan + +**Status**: Investigation in progress. PR #536 blocked until this class of failure is resolved. + +## Scope + +Hash::Wrap's `t/as_return.t` (45 lines) and DBIx::Class exhibit the same class of failure: extremely high CPU + memory, no apparent forward progress, wallclock >> real-Perl expectation. User-visible symptom is "stuck" or "timeout". + +This plan picks Hash::Wrap as the minimal reproducer (tight CPAN test, independent of DBIC fixtures). + +## Observations (2026-04-23) + +### Reproducer captured +``` +/Users/fglock/projects/PerlOnJava3/dev/bench/hash_wrap_repro/ + t/as_return.t # 45 lines, copied from Hash-Wrap-1.09 + lib/Hash/Wrap.pm # upstream pure-Perl +``` + +Invoke: +```bash +cd dev/bench/hash_wrap_repro +timeout 30 ../../../jperl -Ilib t/as_return.t +``` + +Baseline: at 15 s the main thread has used 13 s CPU (~89 % of one core — **not** GC-thrash on my machine). Reproduces at 11+ cores on the user's original machine — same code, different GC amplification due to machine/load. Correctness-level reproducer is the same. + +### First bug localised: `B::NULL::next` self-loop + +`jstack` on the stuck process shows the inner loop is: + +``` +java.util.concurrent.ConcurrentHashMap.get(ConcurrentHashMap.java:952) +NameNormalizer.normalizeVariableName(NameNormalizer.java:144) +InheritanceResolver.findMethodInHierarchy(InheritanceResolver.java:310) +Universal.can(Universal.java:175) +RuntimeCode.callCached(RuntimeCode.java:1780) +anon1485.apply(Test2/Util/Sub.pm:577) <-- $op->can('line') / $op->can('next') +``` + +Tracing upward: `Test2::Util::Sub::sub_info` walks the OP tree: + +```perl +my $op = $cobj->START; +while ($op) { + push @all_lines => $op->line if $op->can('line'); + last unless $op->can('next'); # <- termination check + $op = $op->next; +} +``` + +PerlOnJava's `src/main/perl/lib/B.pm` has: + +```perl +package B::NULL { + our @ISA = ('B::OP'); + sub new { bless {}, shift } + sub next { + # NULL is terminal -- return self to prevent infinite loops + return $_[0]; + } +} +``` + +**The comment is inverted.** Returning `$_[0]` keeps `$op` as the same B::NULL forever: + +* `$op->can('line')` → true (inherited from B::OP) +* `$op->can('next')` → true (inherited from B::OP) +* `$op = $op->next` → same B::NULL +* Loop never exits, `@all_lines` grows unboundedly → GC pressure once array outgrows young gen → user sees the 13 GC threads + 25 % useful CPU. + +Hash::Wrap trips this because Test2's structural compare (`meta { prop ... object { call ... } }`) calls `sub_info` on every comparison callback — one infinite loop per check. + +DBIx::Class likely trips the same path (its test suite also uses Test2 deep compare, and DBIC itself uses Sub::Defer / B introspection heavily). + +### Fix for the immediate infinite loop + +Replace `B::NULL::next` with a sentinel that actually terminates the common walker patterns: + +```perl +package B::NULL { + our @ISA = ('B::OP'); + sub new { bless {}, shift } + + # Every method call on B::NULL returns undef (matches real Perl XS). + # Crucially, `$null->next` returning undef terminates while($op) loops. + sub next { return; } + sub line { return; } + # `can('next')` still returns true via B::OP inheritance; the + # caller's `$op = $op->next` sets $op to undef and while($op) exits. +} +``` + +Before landing: audit other B.pm sentinel methods (`sibling`, `targ`, `sibparent`, `first`, `last`, etc.) for the same mistake. + +## Why this is sufficient for Hash::Wrap but not the full class of problem + +The B::NULL fix makes `sub_info` terminate on first invocation. Once it's terminating: + +1. The test proceeds into the actual structural compare. +2. Every `is($obj, meta { ... })` still allocates deep `Test2::Compare::Delta` trees. +3. Each Delta node is a blessed hashref → traverses `RuntimeScalar.setLargeRefCounted`, `MortalList.deferDecrement*`, walker arming etc. +4. This is the *real* distributed-tax problem we already confirmed in Phase R. + +With just the B::NULL fix, Hash::Wrap completes but still runs an order of magnitude slower than real Perl. That may be acceptable for the test-to-pass gate; it is not acceptable for "perf parity". The full plan below addresses both. + +## Plan + +Four phases. Each phase has an explicit measurement gate before moving to the next. + +### Phase 0 — Unblock the test (same-day) + +1. **Fix `B::NULL::next`** and audit other B.pm sentinels (see above). +2. Run Hash::Wrap `t/as_return.t` and `DBIx-Class-0.082844-68/t/storage/base.t` to completion. Record wallclock, CPU ratio, allocation rate via JFR. +3. Acceptance: both complete in finite time, produce TAP with actual pass/fail rather than timeouts. (Pass/fail counts themselves can still regress — that's Phase 1-3 territory.) +4. Commit the fix on `perf/phase-r-needs-cleanup`. + +**Risk**: very low. Change is localised to the B.pm shim. Regression surface: code that relied on `$null->next == $null` for some iteration invariant. No known such code. + +### Phase 1 — Establish allocation baseline + +Goal: turn "slow under GC" from hand-wave into numbers. + +1. JFR run on Hash::Wrap `t/as_return.t`: + ``` + JPERL_OPTS="-XX:+FlightRecorder -XX:StartFlightRecording=\ + filename=dev/bench/results/jfr/hash_wrap.jfr,\ + settings=profile,duration=60s" \ + ./jperl -Ilib t/as_return.t + ``` + Capture `jdk.ObjectAllocationSample` + `jdk.ObjectAllocationInNewTLAB` + `jdk.GCHeapSummary`. + +2. Same run with `JPERL_CLASSIC=1` for the upper bound. + +3. Top allocators (top 10 by bytes): expected candidates are `RuntimeScalar`, `RuntimeHash`, `RuntimeArray`, `MortalList$Entry`, Test2 Delta/Check/Meta classes (pure Perl packages compiled to our anon classes). Record exact numbers in `dev/design/hash_wrap_alloc_profile.md`. + +4. GC metric deltas: young-gen pause %, old-gen promotions/sec, total GC time as % of wallclock. If CLASSIC drops GC time from e.g. 60 % to 10 %, we know our machinery is the allocation driver; if GC stays high under CLASSIC, the allocation source is non-PerlOnJava (upstream Test2 / Hash::Wrap pattern itself). + +**Acceptance gate**: an allocation profile committed under `dev/bench/results/` that clearly identifies the top 3 allocation sites contributing >60 % of bytes. + +### Phase 2 — Reduce allocation at the top-3 sites + +This is concrete engineering work whose scope depends on Phase 1's findings. Candidate targets based on prior profiling work: + +| Candidate | Already known from | Expected impact | +|---|---|---| +| `RuntimeList.add` → `ArrayList.grow` from initial capacity 10 | `life_bitpacked_jfr_profile.md` | 5–14 % on life_bitpacked | +| `MortalList.pending` growth (same `ArrayList.grow` pattern) | `classic_experiment_finding.md` (implicit) | varies with callsite density | +| Per-`my` `MyVarCleanupStack.register` list add | Phase R measured | already captured in `1.49×` | +| Intermediate `RuntimeScalar(integer)` boxing in comparison callbacks | `life_bitpacked_jfr_profile.md` (via `RuntimeScalarCache.getScalarInt`) | unknown for Test2 workload | + +For each chosen target: + +1. Minimal hack that short-circuits the allocation (even if broken) — upper-bound measurement. +2. If upper bound ≥ 5 % wallclock improvement, implement cleanly. +3. If < 5 %, document and move on (Phase 1 Lessons Learned rule). + +**Acceptance gate**: Hash::Wrap wallclock within 5 × real Perl and no test failures beyond pre-existing. + +### Phase 3 — Conditional machinery (the real Phase R) + +`JPERL_CLASSIC=1` proved that removing the machinery globally restores master-era performance. Making the machinery *conditional on need* gives us that speedup without sacrificing DESTROY/weaken correctness. + +Proposal restated here for a fresh reader: + +* One `public boolean needsCleanup` on `RuntimeBase`, default `false`. +* Set to `true` on: `bless` into a class with `DESTROY`, `Scalar::Util::weaken`, closure-capture of a blessed referent (later — first cut only covers the first two). +* Every CLASSIC-gated site becomes `if (!base.needsCleanup) return ;`: + - `RuntimeScalar.setLargeRefCounted` + - `RuntimeScalar.scopeExitCleanup` + - `MortalList.deferDecrementIfTracked` etc. + - `MortalList.scopeExitCleanupHash` / `scopeExitCleanupArray` + - `EmitVariable`: MyVarCleanupStack.register emission (still compile-time gated via `CleanupNeededVisitor`, that stays) + +Test2's `Compare::Delta` nodes are blessed but *don't* have DESTROY — so they land on the fast path. Hash::Wrap's `A1`/`A2` wrappers are blessed but don't have DESTROY — fast path. DBIC's `ResultSet`/`ResultSource` *do* have DESTROY (via `next::can` dispatch under the hood) — slow path, correct. + +**Scope**: ~30 gate sites mapped by the CLASSIC patch. Each call site gets a one-line guard. Core invariant change is on `RuntimeBase` — one new bit. + +**Acceptance gate** (the PR merge gate): + +| Measurement | Gate | +|---|---| +| Hash::Wrap `t/as_return.t` | passes in < 2 × real-Perl wallclock | +| DBIC full suite `./jcpan -t DBIx::Class` | zero timeouts; same pass count as commit `99509c6a0` (13 804 / 13 804) | +| `make test-bundled-modules` | still 176 / 176 | +| `make` unit tests | no new regressions beyond pre-existing `destroy_eval_die.t#4` | +| `life_bitpacked` | Phase R speedup preserved (≥ 1.3 × vs pre-merge baseline) | +| `destroy_eval_die.t` | same pass count (9 / 10 on current branch) | +| DBIx::Class `t/storage/txn_scope_guard.t` | 18 / 18 | + +**Risk**: Medium. Per-object bit is simple in principle; the hard part is ensuring every *entry* into the tracked-object set correctly flips the bit. Fortunately the CLASSIC patch already identifies the gates, so we have a map. + +### Phase 4 — Validation & documentation + +1. Run Phase 3 acceptance gate on a clean machine. Document wallclock/CPU/GC numbers for each benchmark in `dev/bench/results/`. +2. Update `dev/design/perl_parity_plan.md` to reflect Phase R → Phase R+(refcount-by-need) progression. +3. Merge PR #536 once all gates are green. +4. File follow-up tickets for remaining ≤ 5 % per-site optimisations (none are in scope for the merge). + +## Sequence / dependencies + +``` +Phase 0 (immediate fix) ──┐ + ├─▶ Phase 1 (profile) ──▶ Phase 2 (alloc reductions) ──▶ Phase 3 (conditional machinery) ──▶ Phase 4 (validate + merge) +``` + +Phase 0 is the sole prerequisite to unblock `./jcpan -t DBIx::Class` from getting stuck in the infinite loop. Phases 2 and 3 are independent of each other — if Phase 2 alone gets us to the merge gate, Phase 3 can slip to a follow-up PR. + +## Immediate next step + +Apply the B::NULL fix, verify Hash::Wrap completes (doesn't need to *pass*, just complete), commit, rerun `./jcpan -t DBIx::Class` to see whether any tests that were previously timing out now progress to a proper result. diff --git a/dev/design/life_bitpacked_jfr_profile.md b/dev/design/life_bitpacked_jfr_profile.md new file mode 100644 index 000000000..11a9935d4 --- /dev/null +++ b/dev/design/life_bitpacked_jfr_profile.md @@ -0,0 +1,75 @@ +# life_bitpacked JFR profile — what's actually hot + +**Captured:** 2026-04-18 on `perf/perl-parity-phase1` @ 3c2ca4b6a +**Workload:** `./jperl examples/life_bitpacked.pl -r none -g 20000` (17.2s wall, 14.85 Mcells/s) +**Profile:** 60s JFR with `settings=profile`, ~477 ExecutionSample events, file `dev/bench/results/jfr/life_bp_long.jfr` + +## Why this profile exists + +Phase 1 of `perl_parity_plan.md` was rejected on its upper-bound measurement (~0.7% vs 2% gate). The conclusion was that our a-priori cost model (which assumed INVOKESTATIC dispatch of `MortalList.flush` was hot) was wrong — HotSpot had already inlined the empty-case fast path. Before committing to Phase 2's large sub-call-context refactor, we profiled first. + +Phase 2 pristine-args stub experiment also showed zero improvement (same median within noise). So **the 7 ThreadLocal sub-call stacks are NOT the bottleneck either** — the JIT handles them well. + +## Hot methods (top-of-stack self-time) + +| Method | Samples | % | +|---|---:|---:| +| `RuntimeScalar.getDefinedBoolean()` | 74 | **~15%** | +| `anon230.apply` (user sub body) | 70 | 14% | +| `java.util.Arrays.copyOf` (ArrayList growth) | 70 | 14% | +| `RuntimeScalarType.blessedId(RuntimeScalar)` | 55 | 11% | +| `RuntimeScalar.set(RuntimeScalar)` | 38 | 8% | +| `RuntimeList.setFromList(RuntimeList)` | 27 | 5% | +| `RuntimeScalarCache.getScalarInt(int)` | 20 | 4% | +| `RuntimeControlFlowRegistry.checkLoopAndGetAction(String)` | 12 | 2% | +| `RuntimeScalar.scopeExitCleanup` | 6 | 1% | +| `MortalList.flush` | 5 | 1% | + +The user's bitwise ops (`bitwiseAnd`/`Xor`/`Or`/`shiftLeft`/`shiftRight`) together amount to ~**30 samples = 6%** — tiny compared to the dispatch/allocation overhead. + +## Key insights + +### 1. `getDefinedBoolean()` is the #1 self-time hit + +15% of CPU is spent deciding whether a scalar is defined. This is hit heavily by things like `if ($x)` boolean truth tests, `defined($x)` guards, and `||` / `//` expressions. Any simplification (e.g., marking cached common scalars as "always defined" and short-circuiting) would pay out immediately. + +### 2. ArrayList growth is the #2 self-time hit + +14% of CPU is spent in `Arrays.copyOf` for ArrayList growth. Stack traces show the callers are: +- `RuntimeList.add(RuntimeBase)` — return value list building in `RuntimeCode.apply` +- `RuntimeList.add(RuntimeScalar)` — user sub assembling its return list + +**This means every sub call allocates a small ArrayList that immediately grows.** Presizing or pooling could save ~14%. + +### 3. `blessedId` is 11% + +`RuntimeScalarType.blessedId(RuntimeScalar)` is hit 55 times. This is the per-method-call class dispatch path. On life_bitpacked there are no blessed objects in the hot path, so this is checking whether a scalar is blessed on every op that might use overloading. A fast-path for "not blessed" could matter. + +### 4. `MortalList.flush` is irrelevant (1%) + +Confirms Phase 1's rejection — `flush` is barely on the profile. + +### 5. ThreadLocal overhead is invisible + +No `ThreadLocal.get()` or `ArrayDeque.push/pop` in the hot list. JIT already inlines these. **Phase 2 of the original plan (consolidate 7 TL stacks) would not help life_bitpacked.** + +## Revised candidate phases + +| Phase | Hypothesis | Upper bound estimate | First test | +|---|---|---|---| +| 2' | Presize `RuntimeList` backing ArrayList to avoid grow-from-10 | **5-10%** (14% ceiling) | Change `RuntimeList`'s initial `new ArrayList<>()` to `new ArrayList<>(8)` or similar; A/B | +| 3' | Fast-path `getDefinedBoolean` for `RuntimeScalarReadOnly` / integer types | **3-5%** (15% ceiling) | Add explicit override on cached scalar types; A/B | +| 4' | Fast-path `blessedId` for non-blessed scalars | **2-4%** (11% ceiling) | Inline `blessed == null` check; A/B | + +Any single one of these has a higher upper bound than Phase 1 or original Phase 2 ever could. They should each be derisked with a minimal patch + measurement before committing to implementation, same gating as Phase 1. + +## What to do next + +1. **Retire original Phase 2** (TL consolidation) in `perl_parity_plan.md`. +2. **Adopt Phase 2'** (RuntimeList presize) as the new Phase 2 candidate. +3. **Measurement-first rule still applies** — start every phase with a minimal hack + 5-run median; if it doesn't move the needle by 2%+, reject. + +## Files + +- `dev/bench/results/jfr/life_bp_long.jfr` — raw JFR, reproducible with `JPERL_OPTS="-XX:+FlightRecorder -XX:StartFlightRecording=duration=60s,filename=...,settings=profile" ./jperl examples/life_bitpacked.pl -r none -g 20000` +- `/tmp/jfr_exec.txt` — textual dump of `jdk.ExecutionSample` events (not committed; regenerate with `jfr print --events jdk.ExecutionSample ...jfr`) diff --git a/dev/design/perl_parity_plan.md b/dev/design/perl_parity_plan.md new file mode 100644 index 000000000..954aee71e --- /dev/null +++ b/dev/design/perl_parity_plan.md @@ -0,0 +1,438 @@ +# Perl-parity plan — recovering the master-to-perl gap + +**Status:** Proposal + Phase 1 execution +**Origin:** `dev/design/perl5_internals_comparison.md` identified 5 structural hot-path differences between PerlOnJava (master and below) and system perl. This doc turns that analysis into concrete phases with per-phase measurement gates. + +## Scope + +Close the **master-to-perl** gap — the ~1.52× speed ratio master has vs system perl on `life_bitpacked` (and the similar ratio on other sub-call-heavy benches). This is about PerlOnJava's fundamental per-sub-call overhead; it is **separate** from the walker-hardening / refcount-alignment overhead vs master (that's tracked in `life_bitpacked_regression_analysis.md` and §0 of `next_steps.md`). + +## Overall measurement protocol + +Every phase MUST produce evidence from the **same measurement harness** so phases can be compared: + +1. **Correctness gates (hard):** + - `make` — all unit tests pass except pre-existing `destroy_eval_die.t#4` + - `src/test/resources/unit/refcount/destroy_eval_die.t` — same pass count as baseline + - DBIx::Class `t/storage/txn_scope_guard.t` — 18/18 + - `src/test/resources/unit/tie_scalar.t` — 12/12 + - `src/test/resources/unit/refcount/*.t` — same pass count as baseline + - **Any correctness regression blocks the phase.** + +2. **Perf gate (per phase):** + - A/B within a single process: 5 runs with the feature enabled, 5 runs with a `JPERL_NO_PHASE_N=1` env var disabling it (each phase defines its disable switch). + - `life_bitpacked` with `-r none -g 500` — median Mcells/s compared. + - Full `COMPARE=perl BENCH_RUNS=3 dev/bench/run_baseline.sh` snapshot, `baseline-.md` captured in `dev/bench/results/`. + - **Required:** median life_bitpacked improvement ≥ 2% AND no benchmark regresses > 3% compared to pre-phase baseline. + - **If neither condition holds, revert.** Phase stays as "attempted, didn't pan out" in this doc. + +3. **Comparison anchors (always measured):** + - System perl + - `feature/refcount-perf-combined` HEAD before this phase started + - Current tip (after this phase) + - Master (once per full measurement pass, as the long-term ceiling reference) + +## Phase summary & dependencies + +Phases are numbered by the order in which they should ship: + +| # | Change | Expected gain | Effort | Depends on | +|---|---|---:|---|---| +| 1 | ~~FREETMPS-style compare gating `MortalList.flush`~~ — **REJECTED** 2026-04-18, upper-bound ~0.7% | n/a | low | — | +| 2 | ~~Consolidate 7 TL stacks → one `PerlContext` struct~~ — **REJECTED** 2026-04-18, pristine-stub upper bound 0% | n/a | medium | — | +| 2' | ~~Presize `RuntimeList` backing ArrayList~~ — **SUPERSEDED** by Phase R (see below) | n/a | low-medium | — | +| 3' | ~~Fast-path `getDefinedBoolean`~~ — **SUPERSEDED** by Phase R | n/a | low | — | +| 4' | ~~Fast-path `blessedId`~~ — **SUPERSEDED** by Phase R | n/a | low | — | +| **R** | **Per-object `needsCleanup` gate across all branch machinery** (the real fix) | **~67% (life_bitpacked), 14% (lexical-only)** | high | — | +| 5 | Final cleanup & doc sync | — | low | R | + +**2026-04-18 update:** The `JPERL_CLASSIC=1` experiment (see `dev/design/classic_experiment_finding.md`) confirmed the cumulative-tax hypothesis. Disabling the branch's added machinery globally recovers 1.67× on life_bitpacked (essentially reaching pre-merge master) and 1.14× on a lexical-only bench. The master→branch gap is not one hotspot; it is ~a dozen small taxes that cannot be fixed piecewise. + +The correct structural fix (Phase R) is: add a single `needsCleanup` bit to `RuntimeBase`, set only for objects that actually need DESTROY/weaken/walker tracking, and gate every added fast-path site on that bit. The CLASSIC experiment has already mapped out exactly which sites need the gate. + +Phase 2'/3'/4' (the hotspot-driven candidates from `life_bitpacked_jfr_profile.md`) are superseded — those hotspots (`getDefinedBoolean`, `ArrayList.grow`, `blessedId`) are amplified by the same machinery and will get quieter automatically once Phase R is in. + +Why this order: + +- **Phase 1 is standalone** — no dependencies on the other phases, minimal risk, quick measurement. Serves as a sanity check that our measurement harness is sensitive enough to detect the expected-magnitude gains. +- **Phase 2 is the keystone.** Several of the later phases become cheaper once all the caller-context state lives in one struct behind one ThreadLocal (inline refcount helpers need this; array-backed stack needs this). +- **Phase 3 reuses Phase 2's struct** — the tiny inlinable refcount helpers live in or adjacent to `PerlContext`. +- **Phase 4 is the big structural lift** — do last when the surrounding state is simplified. +- **Phase 5** is the documentation sync + any cleanup of tombstone branches / temporary opt-out env vars. + +Abort early if any phase fails its perf gate. We don't pile up speculative changes. + +### Lessons from Phase 1 (apply to Phases 2-4) + +Phase 1 was rejected on its upper-bound measurement (~0.7% vs 2% gate) — the cost model ("INVOKESTATIC dispatch is hot") was wrong because HotSpot inlined the empty-case fast path inside `flush()`. Conclusion: + +**Every remaining phase MUST derisk with a profiler sample BEFORE implementation.** The per-phase workflow is now: + +1. **Upper-bound experiment first.** Patch the minimum hack that would represent the phase's theoretical best case (even if broken/unsafe) and measure life_bitpacked + bench suite. If the upper bound is < 1.5× the required gate, reject the phase without implementation. +2. **If upper-bound passes:** implement cleanly, run correctness gates, measure again. +3. **If upper-bound fails:** document in this doc and move to next phase. + +This saves ~days per rejected phase vs. a full implementation + revert cycle. + +--- + +## Phase 1 — FREETMPS-style compare gating the flush + +**Status: INVESTIGATED — REJECTED (2026-04-18)** + +### Result + +Upper-bound experiment on `perf/perl-parity-phase1` @ 3c2ca4b6a: patched `EmitStatement.java` to emit **zero** `INVOKESTATIC MortalList.flush` calls at scope exit (gated by `JPERL_DISABLE_FLUSH_EMIT=1`). This simulates the absolute best case the Phase 1 guard could achieve — a theoretical zero-cost flush skip. + +`life_bitpacked -r none -g 500`, 5 runs each, median Mcells/s: + +| Variant | Runs | Median | +|---|---|---| +| Baseline (flush emitted) | 8.93 / 8.77 / 8.80 / 8.81 / 8.78 | **8.80** | +| Upper bound (no flush emitted) | 8.95 / 8.86 / 8.90 / 8.86 / 8.76 | **8.86** | + +Improvement: ~0.7%. Well below the ≥2% Phase 1 gate. Within noise on a single bench. + +### Why this was wrong + +`MortalList.flush()`'s first instruction is `if (!active || pending.isEmpty() || flushing) return;`. HotSpot C2 inlines static call targets ≤ 35 bytes after ~10k invocations, so the "empty case" path effectively becomes three GETSTATIC-IFEQ-like checks. There is no meaningful INVOKESTATIC dispatch cost to cut once inlining takes over. + +The real cost driver on life_bitpacked is **somewhere else** — most likely the sub-call context (Phase 2) and/or refcount ops (Phase 3). + +### Decision + +Phase 1 is closed out. No code change shipped. Moving to Phase 2. + +--- + +### Goal (original, kept for archival) + +Make the common "no mortals to free" case at scope exit effectively free. + +### Background + +System perl: +```c +#define FREETMPS if (PL_tmps_ix > PL_tmps_floor) free_tmps() +``` + +One compare, zero overhead when the tmp stack is empty. + +PerlOnJava today emits at scope exit: +``` +INVOKESTATIC MortalList.flush ()V +``` + +…unconditionally. `MortalList.flush()` itself checks for an empty stack as its first action, but the INVOKESTATIC dispatch cost (~5 ns) is paid regardless. Over millions of sub calls in a tight loop, measurable. + +### Design + +Add two `public static` int fields (or thin accessors) exposing `MortalList.tmpsIx` and `MortalList.tmpsFloor`. Emit: + +``` +GETSTATIC MortalList.tmpsIx I +GETSTATIC MortalList.tmpsFloor I +IF_ICMPLE skip_flush +INVOKESTATIC MortalList.flush ()V +skip_flush: +``` + +~5 bytes of bytecode replaces 3 for the call, but saves the call dispatch when the stack is empty. + +`MortalList.flush()` stays unchanged — we're bypassing its INVOKESTATIC in the common case, not changing its semantics. + +### Risks + +- Reading the int fields is a `GETSTATIC`, which is very cheap. No correctness concern. +- If the indices are not public yet, we either expose them or add cheap static accessor helpers that the JIT can inline (a `public static int tmpsAboveFloor() { return tmpsIx - tmpsFloor; }` would be cleanest). +- Concurrent modification: `MortalList` is a ThreadLocal, so the fields are per-thread. No visibility issues. + +### Opt-out + +Env var `JPERL_NO_PHASE1=1` set at `EmitterMethodCreator` load-time forces the unconditional `INVOKESTATIC MortalList.flush` emission. Lets us A/B the exact same binary. + +### Correctness gates + +- Full `make` run green (except destroy_eval_die.t#4 pre-existing) +- destroy_eval_die.t pass count unchanged +- DBIC txn_scope_guard.t 18/18 +- tie_scalar.t 12/12 +- Quick DESTROY smoke: `./jperl -e 'package T; sub new { bless {}, shift } sub DESTROY { $::d++ } package main; { my $x = T->new; } print $::d'` should print `1` + +### Measurement gate + +- life_bitpacked: 5 runs each branch, median improvement ≥ 2% +- `refcount_bless` / `refcount_anon`: no regression > 3% +- Full bench suite snapshot committed under `dev/bench/results/` + +### Abort condition + +If the median is < 2% improvement OR any correctness gate fails OR any non-life_bitpacked benchmark regresses > 3% compared to the pre-phase baseline, **revert the phase**. Document the finding in this doc under "Phase 1 results". + +--- + +## Phase 2 — Consolidate ThreadLocal stacks into `PerlContext` + +### Goal + +Reduce per-sub-call ThreadLocal traffic from 7 separate `TL.get()` lookups to 1. Also eliminate the `HashMap` copy in `WarningBitsRegistry.pushCallerHintHash()` for the common empty-hint-hash case. + +### Background + +System perl pushes ONE `PERL_CONTEXT` struct per sub call. All caller metadata (CV, retop, savearray, old pad, warning bits, hints, etc.) is in that one struct. `cxstack` is a flat array of these structs; pushing is `cxstack[cxstack_ix++] = ...`. + +PerlOnJava today has 7 separate ThreadLocal stacks: + +1. `RuntimeCode.argsStack` +2. `RuntimeCode.pristineArgsStack` (our branch) +3. `RuntimeCode.hasArgsStack` +4. `WarningBitsRegistry.currentBitsStack` +5. `WarningBitsRegistry.callerBitsStack` +6. `WarningBitsRegistry.callerHintsStack` +7. `HintHashRegistry.callerSnapshotIdStack` + +Each push is: `ThreadLocal.get()` + `Deque.push(value)`. Seven times per sub call. + +JFR confirms the cost: 4 extra `RuntimeList` + 4 extra `ArrayList` allocations per life_bitpacked generation vs master (ArrayDeque's internal bookkeeping spills into these allocations). + +### Design + +Introduce `class PerlContext` in `org.perlonjava.runtime.runtimetypes`. Fields: + +```java +public final class PerlContext { + // args stacks + public RuntimeArray[] argsStack; int argsIx; + public List[] pristineArgsStack; int pristineIx; + public boolean[] hasArgsStack; int hasArgsIx; + + // caller context (one array of frame records) + public CallerFrame[] callerFrames; int callerFramesIx; + + // mortal / savestack state + public int tmpsIx, tmpsFloor; + // ... +} + +public static final ThreadLocal CTX = + ThreadLocal.withInitial(PerlContext::new); +``` + +One `TL.get()` at sub entry, one at sub exit. All the pushes are field + array operations. + +`CallerFrame` combines bits, hints, hintHashId into a single record. + +Existing APIs (`getCallerBitsAtFrame`, `getCallerHintsAtFrame`, `getHasArgsAt`, `getPristineArgsAt`) read from the consolidated frames array. + +The separate Registries stay as pure facades (their existing static methods delegate to the consolidated struct) so external callers don't break. + +### Risks + +- Touches many read sites. Needs thorough testing. +- Multi-phase migration: first add `PerlContext` alongside the existing stacks, make the registries read from both (prefer PerlContext), then remove the old stacks. +- Interpreter backend (`BytecodeInterpreter`) may have direct references to some of these stacks; must be updated. + +### Opt-out + +`JPERL_NO_PHASE2=1` at class-load time uses the old stacks. Adds a runtime branch on the flag in each push/pop, so we can A/B. + +### Correctness gates + +Same as Phase 1, plus: +- Run full DBIC test suite (`jcpan -t DBIx::Class`) — expect same pass count as pre-phase +- Run TT, Moo +- `make test-bundled-modules` + +### Measurement gate + +- life_bitpacked: 5 runs each, median improvement ≥ 4% over Phase 1 baseline +- No bench regresses > 3% +- Allocation profile: `RuntimeList` / `ArrayList` allocation rate cut by ≥ 50% + +### Abort condition + +If gains are < 4% OR allocation rate doesn't drop, the consolidation is not paying for its complexity — revert to just keeping the `PerlContext` as a stub for Phase 3's benefit. + +--- + +## Phase 3 — Inline refcount helpers + +### Goal + +Make `refcnt_inc` / `refcnt_dec_or_free` tiny static methods (< 20 bytes) that the JIT always inlines, moving `ScalarRefRegistry.registerRef` / `MortalList.deferDecrement` to the cold path. + +### Background + +Perl's SvREFCNT_inc is `++sv->refcnt` (1 store). SvREFCNT_dec is 4 instructions in the hot path with `sv_free2` on the cold path only. + +PerlOnJava's equivalent goes through `setLarge()` / `scopeExitCleanup` — methods that are 100-500 bytes and fail to inline under `-XX:+PrintInlining`. + +### Design + +Add to `RuntimeBase` (or a new `Refcnt` class): + +```java +public static void refcntInc(RuntimeBase base) { + if (base != null && base.refCount >= 0) { + base.refCount++; + } +} + +public static void refcntDecOrFree(RuntimeBase base) { + if (base != null && base.refCount > 1) { + base.refCount--; + } else if (base != null) { + base.refCount--; + refcntFreeColdPath(base); // separate method, out of line + } +} +``` + +Each helper body is < 20 bytes. JIT will inline eagerly at hot call sites. + +Migrate call sites in the emitter: instead of emitting `INVOKESTATIC scopeExitCleanup`, emit `INVOKESTATIC refcntDecOrFree`. `scopeExitCleanup` stays for complex cases (IO owner, capture count). + +### Risks + +- Correctness: any case where the "hot path" needs to do more than decrement (IO owner unregister, weakref clearing, MortalList pending entry) must still route through the cold path. +- The `ScalarRefRegistry.registerRef` we currently do at assignment time may conflict — need to understand when it's truly needed. + +### Opt-out + +`JPERL_NO_PHASE3=1` — emit the old INVOKESTATICs. + +### Correctness gates + +Same as Phase 2, plus: +- Specific DESTROY-correctness tests: `unit/refcount/*.t` all pass +- DBIC's 52leaks test still passes + +### Measurement gate + +- `benchmark_refcount_bless` / `benchmark_refcount_anon`: median improvement ≥ 5% over Phase 2 baseline +- life_bitpacked: no regression (this phase isn't expected to help pure numeric loops) +- JIT inlining trace: `refcntDecOrFree (N bytes)` shows `inline (hot)` at hot call sites + +### Abort condition + +Correctness: any regression is an immediate revert (refcount bugs are silent and bad). +Perf: if benchmark_refcount_* doesn't improve ≥ 3%, the win isn't worth the complexity. + +--- + +## Phase 4 — Array-backed value stack + +### Goal + +Match Perl's `PL_stack_sp` / `PL_stack_base` model. Per-thread `Object[] stack` + `int sp` replacing the current `ArrayDeque` for args and similar per-call value passing. + +### Background + +Perl's value stack is a flat `SV**` array. `PUSHs(sv)` is `*PL_stack_sp++ = sv`. `PL_stack_sp` is kept in a register across pp function bodies. Push/pop is ~1 cycle. + +PerlOnJava uses `ArrayDeque`, which: +- Boxes primitives (e.g. `hasArgsStack` push `Boolean.FALSE`) +- Requires virtual dispatch on `push`/`pop` +- Does internal resizing + +### Design + +In `PerlContext` (from Phase 2), replace `ArrayDeque` fields with: + +```java +public RuntimeArray[] argsStack = new RuntimeArray[256]; +public int argsIx; + +public void pushArgs(RuntimeArray a) { + if (argsIx == argsStack.length) argsStack = grow(argsStack); + argsStack[argsIx++] = a; +} +``` + +Same for `pristineArgsStack`, `callerFrames`. + +Critical: the grow check must be in a hot-inlineable function. A `UNLIKELY(argsIx == argsStack.length)` branch is what Perl does via the `markstack_grow()` out-of-line call. + +### Risks + +- `pushArgs`/`popArgs` have to handle both JVM and interpreter backends consistently. +- `caller()` iterates the stack by index; indexed access is actually easier than before. +- **Biggest risk:** subtle ordering bugs when arrays resize. Write a thorough stress test with deeply nested subs. + +### Opt-out + +Phase 2's `PerlContext` supports both the ArrayDeque and array-backed versions behind a feature flag. Env var `JPERL_NO_PHASE4=1` selects ArrayDeque. + +### Correctness gates + +Same as Phase 3, plus: +- Stress test: 1000+ deeply nested sub calls, verify no corruption. +- Run with `-Xss128k` to ensure the stack growth logic works correctly. + +### Measurement gate + +- life_bitpacked: median improvement ≥ 5% over Phase 3 baseline +- benchmark_method: median improvement ≥ 5% +- Bench ratios overall trending toward 1.0× perl + +### Abort condition + +As phases before: any correctness failure or insufficient perf gain triggers revert. + +--- + +## Phase 5 — Cleanup & documentation + +### Goal + +Once Phases 1-4 are stable and green, remove the opt-out env vars (they've served their purpose) and update `dev/design/next_steps.md` to reflect reality. + +### Activities + +- Remove `JPERL_NO_PHASE1`..`JPERL_NO_PHASE4` env var branches. +- Clean up any tombstone branches (`perf/perl-parity-phase-*`). +- Update `dev/design/next_steps.md` §0 tables with final numbers. +- Close-out PR #526's §0 if the numbers justify. + +--- + +## Cumulative expected impact + +Assuming each phase delivers its expected median: + +| Phase | life_bitpacked Mcells/s | vs perl | +|---|---:|---:| +| Start (PR #526 + PR #533) | 8.5 | 2.49× slower | +| After Phase 1 (+3%) | 8.8 | 2.41× slower | +| After Phase 2 (+7%) | 9.4 | 2.25× slower | +| After Phase 3 (+5% on method-heavy; pure-numeric ~unchanged) | 9.5 | 2.23× slower | +| After Phase 4 (+10%) | 10.4 | 2.04× slower | +| perl reference | 21.2 | 1.00× | + +This gets us to roughly **2× perl** on life_bitpacked — significant progress but still not parity. Closing the last 2× is beyond the scope of this plan; it's the RuntimeScalar-boxing + Java-dispatch tax that would require value types / escape-analysis improvements beyond what the current JVM can offer. + +For `benchmark_refcount_bless` (currently 6.6× perl), the expected trajectory is more favorable: + +| Phase | benchmark_refcount_bless ratio | +|---|---:| +| Start | 6.6× perl | +| After Phase 1 | 6.4× | +| After Phase 2 | 5.8× | +| After Phase 3 | ~4.5× | +| After Phase 4 | ~3.8× | + +Still more than 2× perl, reflecting that DESTROY/bless semantics need runtime machinery that C-perl embeds directly in SV. + +## Progress tracking + +### Phase 1 — in progress + +[TO BE FILLED IN DURING EXECUTION] + +### Phase 2 — pending + +### Phase 3 — pending + +### Phase 4 — pending + +### Phase 5 — pending diff --git a/src/main/java/org/perlonjava/backend/jvm/EmitStatement.java b/src/main/java/org/perlonjava/backend/jvm/EmitStatement.java index 4c2395e2d..2e2051aed 100644 --- a/src/main/java/org/perlonjava/backend/jvm/EmitStatement.java +++ b/src/main/java/org/perlonjava/backend/jvm/EmitStatement.java @@ -122,6 +122,15 @@ static void emitScopeExitNullStores(EmitterContext ctx, int scopeIndex, boolean // // JPERL_FORCE_CLEANUP=1 forces cleanupNeeded=true at the // EmitterMethodCreator level for correctness debugging. + // + // Phase R (classic_experiment_finding.md): we EXTEND the existing + // skipMyVarCleanup gate to also suppress MyVarCleanupStack.register + // emission on `my` declarations in EmitVariable. We deliberately + // leave Phase 1/1b (scopeExitCleanup, cleanupHash/Array) and Phase 3 + // (MortalList.flush) emitting unconditionally, per the safety note + // above — those fire DESTROY for refs that entered via @_ even if + // the sub's AST has no bless/weaken/user-sub-call and was marked + // cleanupNeeded=false. boolean skipMyVarCleanup = !ctx.javaClassInfo.cleanupNeeded; // Only emit flush when there are variables that need cleanup. @@ -133,32 +142,36 @@ static void emitScopeExitNullStores(EmitterContext ctx, int scopeIndex, boolean // Phase 1: Run scopeExitCleanup for scalar variables. // This defers refCount decrements for blessed references with DESTROY, // and handles IO fd recycling for anonymous filehandle globs. - for (int idx : scalarIndices) { - ctx.mv.visitVarInsn(Opcodes.ALOAD, idx); - ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC, - "org/perlonjava/runtime/runtimetypes/RuntimeScalar", - "scopeExitCleanup", - "(Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;)V", - false); + if (!MortalList.CLASSIC) { + for (int idx : scalarIndices) { + ctx.mv.visitVarInsn(Opcodes.ALOAD, idx); + ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC, + "org/perlonjava/runtime/runtimetypes/RuntimeScalar", + "scopeExitCleanup", + "(Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;)V", + false); + } } // Phase 1b: Walk hash/array variables for nested blessed references. // When a hash/array goes out of scope, any blessed refs stored inside // (or nested inside sub-containers) need their refCounts decremented. - for (int idx : hashIndices) { - ctx.mv.visitVarInsn(Opcodes.ALOAD, idx); - ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC, - "org/perlonjava/runtime/runtimetypes/MortalList", - "scopeExitCleanupHash", - "(Lorg/perlonjava/runtime/runtimetypes/RuntimeHash;)V", - false); - } - for (int idx : arrayIndices) { - ctx.mv.visitVarInsn(Opcodes.ALOAD, idx); - ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC, - "org/perlonjava/runtime/runtimetypes/MortalList", - "scopeExitCleanupArray", - "(Lorg/perlonjava/runtime/runtimetypes/RuntimeArray;)V", - false); + if (!MortalList.CLASSIC) { + for (int idx : hashIndices) { + ctx.mv.visitVarInsn(Opcodes.ALOAD, idx); + ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC, + "org/perlonjava/runtime/runtimetypes/MortalList", + "scopeExitCleanupHash", + "(Lorg/perlonjava/runtime/runtimetypes/RuntimeHash;)V", + false); + } + for (int idx : arrayIndices) { + ctx.mv.visitVarInsn(Opcodes.ALOAD, idx); + ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC, + "org/perlonjava/runtime/runtimetypes/MortalList", + "scopeExitCleanupArray", + "(Lorg/perlonjava/runtime/runtimetypes/RuntimeArray;)V", + false); + } } // Phase 2: Null all my variable slots to help GC collect associated objects. // For anonymous filehandle globs, this makes them unreachable so the @@ -178,7 +191,7 @@ static void emitScopeExitNullStores(EmitterContext ctx, int scopeIndex, boolean // sub never uses bless/weaken/user-sub-calls/etc.), the stack is // guaranteed empty for this sub's lexicals, so the unregister // loop is dead code. Skipping it is the win this fast path buys. - if (!skipMyVarCleanup) { + if (!skipMyVarCleanup && !MortalList.CLASSIC) { for (int idx : allIndices) { ctx.mv.visitVarInsn(Opcodes.ALOAD, idx); ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC, @@ -209,7 +222,7 @@ static void emitScopeExitNullStores(EmitterContext ctx, int scopeIndex, boolean // boundaries), not entries that need to be preserved. // Flush when requested (non-sub, non-do blocks) even without my-variables, // because pending entries may exist from inner sub scope exits. - if (flush) { + if (flush && !MortalList.CLASSIC) { ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC, "org/perlonjava/runtime/runtimetypes/MortalList", "flush", diff --git a/src/main/java/org/perlonjava/backend/jvm/EmitSubroutine.java b/src/main/java/org/perlonjava/backend/jvm/EmitSubroutine.java index 82c0f27f2..1989cd8a1 100644 --- a/src/main/java/org/perlonjava/backend/jvm/EmitSubroutine.java +++ b/src/main/java/org/perlonjava/backend/jvm/EmitSubroutine.java @@ -99,8 +99,16 @@ public static void emitSubroutine(EmitterContext ctx, SubroutineNode node) { // definition context. Only anonymous subs (my sub, state sub, or true anonymous subs) should // capture variables. This prevents issues like defining 'sub bar::foo' inside a block with // 'our sub foo' from incorrectly capturing the 'our sub' as a closure variable. - // Note: "(eval)" is a special name for eval blocks which should capture variables like anonymous subs - boolean isPackageSub = node.name != null && !node.name.equals("") && !node.name.equals("(eval)"); + // Note: "(eval)" is a special name for eval blocks which should capture variables like anonymous subs. + // + // Exception: named subs defined inside an eval-string DO need to capture outer lexicals. + // This matches Perl 5 semantics: `eval "sub outer_name { \$outer_var }"` closes over \$outer_var. + // Without this exception, Sub::Defer (which eval-compiles deferred subs that close over + // \$undeferred and \$deferred_info) loses its closure captures, leaving \$deferred_info + // unreferenced after defer_sub returns. The weakened %DEFERRED entry then immediately + // clears, and the deferred sub's `goto &\$undeferred` loops into itself forever. + boolean isPackageSub = node.name != null && !node.name.equals("") && !node.name.equals("(eval)") + && !ctx.javaClassInfo.isInEvalString; if (isPackageSub) { // Package subs should not capture any closure variables // They can only access global variables and their parameters diff --git a/src/main/java/org/perlonjava/backend/jvm/EmitVariable.java b/src/main/java/org/perlonjava/backend/jvm/EmitVariable.java index f65948b8a..12bee7c15 100644 --- a/src/main/java/org/perlonjava/backend/jvm/EmitVariable.java +++ b/src/main/java/org/perlonjava/backend/jvm/EmitVariable.java @@ -1542,7 +1542,16 @@ static void handleMyOperator(EmitterVisitor emitterVisitor, OperatorNode node) { // if die propagates through this subroutine without eval. // State/our variables are excluded: state persists across calls, // our is global. register() is a no-op until the first bless(). - if (operator.equals("my")) { + // + // Phase R (classic_experiment_finding.md): skip emission when + // CleanupNeededVisitor proved the enclosing sub has no + // bless/weaken/user-sub-calls — no tracked ref can ever land + // in this my-var, so register/unregister pair is dead code. + // CLASSIC is the global kill switch; cleanupNeeded is the + // per-sub correctness-safe analog. + if (operator.equals("my") + && emitterVisitor.ctx.javaClassInfo.cleanupNeeded + && !org.perlonjava.runtime.runtimetypes.MortalList.CLASSIC) { emitterVisitor.ctx.mv.visitVarInsn(Opcodes.ALOAD, varIndex); emitterVisitor.ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC, "org/perlonjava/runtime/runtimetypes/MyVarCleanupStack", diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index e8a6d2754..26dd95882 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,14 +33,14 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "078e0b3d7"; + public static final String gitCommitId = "4a1ad046b"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitDate = "2026-04-21"; + public static final String gitCommitDate = "2026-04-23"; /** * Build timestamp in Perl 5 "Compiled at" format (e.g., "Apr 7 2026 11:20:00"). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 21 2026 23:11:19"; + public static final String buildTimestamp = "Apr 23 2026 10:28:43"; // Prevent instantiation private Configuration() { diff --git a/src/main/java/org/perlonjava/frontend/analysis/CleanupNeededVisitor.java b/src/main/java/org/perlonjava/frontend/analysis/CleanupNeededVisitor.java index 141483285..954dabfb6 100644 --- a/src/main/java/org/perlonjava/frontend/analysis/CleanupNeededVisitor.java +++ b/src/main/java/org/perlonjava/frontend/analysis/CleanupNeededVisitor.java @@ -83,6 +83,14 @@ public void visit(OperatorNode node) { mark(); return; } + // tie/untie invoke user-written TIESCALAR/TIEHASH/TIEARRAY/UNTIE + // methods which can do bless etc. — treat as user sub call. + // Phase R: without this, tie_scalar.t / tie_array.t regress when + // scopeExitCleanup emission is gated on cleanupNeeded. + if ("tie".equals(node.operator) || "untie".equals(node.operator)) { + mark(); + return; + } if (node.operand != null) node.operand.accept(this); } diff --git a/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java b/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java index 2a4243c35..7b253f2fa 100644 --- a/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java +++ b/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java @@ -1299,6 +1299,15 @@ public static ListNode handleNamedSubWithFilter(Parser parser, String subName, S Field field = placeholder.codeObject.getClass().getDeclaredField("__SUB__"); field.set(placeholder.codeObject, codeRef); + // Track closure captures: increment captureCount on each + // captured outer lexical, so scopeExitCleanup knows this + // named sub holds a strong ref to them. Without this, weaken- + // based patterns like Sub::Defer's %DEFERRED registry clear + // immediately after defer_sub returns, causing the deferred + // sub's `goto &$undeferred` to loop into itself forever. + // The anon-sub path already does this inside makeCodeObject(). + RuntimeCode.trackClosureCaptures(placeholder, placeholder.codeObject, generatedClass); + } else if (runtimeCode instanceof InterpretedCode interpretedCode) { // InterpretedCode path - update placeholder in-place (not replace codeRef.value) // This is critical: hash assignments copy RuntimeScalar but share the same diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/MortalList.java b/src/main/java/org/perlonjava/runtime/runtimetypes/MortalList.java index 91ee9fde8..8b08173ad 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/MortalList.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/MortalList.java @@ -26,6 +26,22 @@ public class MortalList { // as a trivially-predicted branch; the JIT will elide them. public static boolean active = true; + /** + * Experiment #3 — cumulative-tax hypothesis. + * When {@code JPERL_CLASSIC=1}, collapse the branch's added refcount/ + * walker/weaken/DESTROY machinery to a no-op where possible, so the + * runtime behaves as close to pre-merge master as we can get with a + * single static flag. Used to measure whether the full master→branch + * regression is recoverable at all by disabling the machinery. + * Breaks DESTROY + weaken + walker semantics; only safe for benchmarks. + */ + public static final boolean CLASSIC = + System.getenv("JPERL_CLASSIC") != null; + + static { + if (CLASSIC) active = false; + } + // List of RuntimeBase references awaiting decrement. // Populated by delete() when removing tracked elements. // Drained at statement boundaries (FREETMPS equivalent). diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/MyVarCleanupStack.java b/src/main/java/org/perlonjava/runtime/runtimetypes/MyVarCleanupStack.java index f1bffaf71..3f5117649 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/MyVarCleanupStack.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/MyVarCleanupStack.java @@ -72,6 +72,7 @@ public static int pushMark() { * @param var the RuntimeScalar, RuntimeHash, or RuntimeArray object */ public static void register(Object var) { + if (MortalList.CLASSIC) return; stack.add(var); // liveCounts is only consulted by ReachabilityWalker.sweepWeakRefs, // which runs only when WeakRefRegistry.weakRefsExist is true. For @@ -100,7 +101,7 @@ public static void register(Object var) { * @param var the RuntimeScalar/Array/Hash previously registered */ public static void unregister(Object var) { - if (var == null) return; + if (var == null || MortalList.CLASSIC) return; // Block-scoped my-vars pop in reverse declaration order, so // scan from the top of the stack for a fast amortized match. for (int i = stack.size() - 1; i >= 0; i--) { diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java index 8286a443e..087f54848 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java @@ -1630,6 +1630,44 @@ public static RuntimeScalar makeCodeObject(Object codeObject) throws Exception { return makeCodeObject(codeObject, null); } + /** + * Track closure captures: iterate all RuntimeScalar fields of the generated + * code class (except the self-referencing __SUB__), increment captureCount + * on each, and store the list on the RuntimeCode for eventual release when + * the CODE ref's refCount drops to 0. + * + *

Called from both {@link #makeCodeObject} (anonymous subs installed + * via {@code sub {...}}) and SubroutineParser (named subs). Without this + * call, named subs defined inside an eval-string leak their closure + * captures — captureCount stays at 0 on the outer lexicals, causing + * scopeExitCleanup to treat them as uncaptured and prematurely decrement + * referent refCounts. That in turn breaks weaken-based patterns like + * Sub::Defer's %DEFERRED registry, which relies on closure-kept-alive + * lexicals to keep weakened hash entries defined until the first call + * to the deferred sub. + */ + public static void trackClosureCaptures(RuntimeCode code, Object codeObject, Class clazz) throws IllegalAccessException { + Field[] allFields = clazz.getDeclaredFields(); + List captured = new ArrayList<>(); + for (Field f : allFields) { + if (f.getType() == RuntimeScalar.class && !"__SUB__".equals(f.getName())) { + RuntimeScalar capturedVar = (RuntimeScalar) f.get(codeObject); + if (capturedVar != null) { + captured.add(capturedVar); + capturedVar.captureCount++; + } + } + } + if (!captured.isEmpty()) { + code.capturedScalars = captured.toArray(new RuntimeScalar[0]); + // Enable refCount tracking for closures with captures. + // When the CODE ref's refCount drops to 0, releaseCaptures() + // fires (via DestroyDispatch.callDestroy), letting captured + // blessed objects run DESTROY. + code.refCount = 0; + } + } + public static RuntimeScalar makeCodeObject(Object codeObject, String prototype) throws Exception { return makeCodeObject(codeObject, prototype, null); } @@ -1673,25 +1711,7 @@ public static RuntimeScalar makeCodeObject(Object codeObject, String prototype, // Each instance field of type RuntimeScalar (except __SUB__) is a // captured lexical variable. We store them so that releaseCaptures() // can decrement blessed ref refCounts when the closure is discarded. - Field[] allFields = clazz.getDeclaredFields(); - List captured = new ArrayList<>(); - for (Field f : allFields) { - if (f.getType() == RuntimeScalar.class && !"__SUB__".equals(f.getName())) { - RuntimeScalar capturedVar = (RuntimeScalar) f.get(codeObject); - if (capturedVar != null) { - captured.add(capturedVar); - capturedVar.captureCount++; - } - } - } - if (!captured.isEmpty()) { - code.capturedScalars = captured.toArray(new RuntimeScalar[0]); - // Enable refCount tracking for closures with captures. - // When the CODE ref's refCount drops to 0, releaseCaptures() - // fires (via DestroyDispatch.callDestroy), letting captured - // blessed objects run DESTROY. - code.refCount = 0; - } + trackClosureCaptures(code, codeObject, clazz); RuntimeScalar codeRef = new RuntimeScalar(code); diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java index 84344bb86..a00ecdc7a 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java @@ -942,6 +942,13 @@ private RuntimeScalar setLarge(RuntimeScalar value) { * Separated to keep setLarge() small enough for JIT inlining of set(). */ private RuntimeScalar setLargeRefCounted(RuntimeScalar value) { + // Experiment #3: master-like classic path. Skip all refcount / + // WeakRefRegistry / MortalList work, just do the assignment. + if (MortalList.CLASSIC) { + this.type = value.type; + this.value = value.value; + return this; + } // Fast path for untracked references (refCount == -1). // Most reference assignments involve untracked objects (named variables, // anonymous arrays/hashes that were never blessed). Skip all refCount @@ -2335,6 +2342,7 @@ private void closeIOOnDrop() { */ public static void scopeExitCleanup(RuntimeScalar scalar) { if (scalar == null) return; + if (MortalList.CLASSIC) return; // Fast path: skip if no special state (most common case for integer/string vars). // When all three conditions are true, the entire method body is a no-op: diff --git a/src/main/perl/lib/B.pm b/src/main/perl/lib/B.pm index 41635b935..39c6e3605 100644 --- a/src/main/perl/lib/B.pm +++ b/src/main/perl/lib/B.pm @@ -307,10 +307,30 @@ package B::NULL { return bless {}, $class; } - sub next { - # NULL is terminal -- return self to prevent infinite loops - return $_[0]; - } + # B::NULL represents the terminal "null op" in an OP chain. Real Perl's + # XS-backed B::NULL returns undef from all accessor methods (via xs magic), + # which is what common optree walkers like Test2::Util::Sub::sub_info rely + # on to detect end-of-chain: + # + # my $op = $cv->START; + # while ($op) { # <- B::NULL must be falsy-returning + # push @lines => $op->line if $op->can('line'); + # last unless $op->can('next'); + # $op = $op->next; # <- must eventually yield undef + # } + # + # Previous implementation returned `$_[0]` (self) from `next`, which kept + # `$op` pinned on B::NULL forever, causing infinite loops and unbounded + # `@all_lines` growth — observable as GC-thrash + apparent hangs in any + # module that introspects sub coderefs (Test2 deep-compare, Hash::Wrap, + # DBIx::Class, Sub::Defer). See dev/design/hash_wrap_triage_plan.md. + sub next { return; } + sub line { return; } + sub file { return; } + sub sibling { return; } + sub first { return; } + sub last { return; } + sub targ { return; } } package B::COP { diff --git a/src/main/perl/lib/CPAN/Config.pm b/src/main/perl/lib/CPAN/Config.pm index 6aa385cbb..21dd5ce64 100644 --- a/src/main/perl/lib/CPAN/Config.pm +++ b/src/main/perl/lib/CPAN/Config.pm @@ -56,6 +56,34 @@ pl: - "--pp" env: PARAMS_VALIDATE_IMPLEMENTATION: PP +YAML + 'Class-XSAccessor.yml' => <<'YAML', +--- +comment: | + PerlOnJava ships a pure-Perl re-implementation of Class::XSAccessor + bundled in the jar (lib/Class/XSAccessor.pm). The upstream CPAN + distribution is XS-only and fails at runtime with "Can't load + loadable object for module Class::XSAccessor: no Java XS + implementation available" because PerlOnJava has no XS loader. + + Without this distroprefs entry, `jcpan -t ` recurses on + Class::XSAccessor as a transitive dependency of Moo / DBIx::Class + / Class::Method::Modifiers / ..., installs the XS version into + ~/.perlonjava/lib/, and shadows the bundled shim — silently + breaking every module that imports Class::XSAccessor at runtime. + + Skip the build/test/install steps entirely; the bundled shim in + the jar is all PerlOnJava needs. +match: + distribution: "^SMUELLER/Class-XSAccessor-" +pl: + commandline: "true" +make: + commandline: "true" +test: + commandline: "true" +install: + commandline: "true" YAML ); diff --git a/src/main/perl/lib/CPAN/Prefs/Class-XSAccessor.yml b/src/main/perl/lib/CPAN/Prefs/Class-XSAccessor.yml new file mode 100644 index 000000000..9fd8f359f --- /dev/null +++ b/src/main/perl/lib/CPAN/Prefs/Class-XSAccessor.yml @@ -0,0 +1,25 @@ +--- +comment: | + PerlOnJava ships a pure-Perl re-implementation of Class::XSAccessor + bundled in the jar (src/main/perl/lib/Class/XSAccessor.pm). The + upstream CPAN distribution is XS-only and fails at runtime with + "Can't load loadable object for module Class::XSAccessor: no Java XS + implementation available" because PerlOnJava has no XS loader. + + Without this distroprefs entry, `jcpan -t SomeModule` can recurse on + Class::XSAccessor as a transitive dependency of Moo/DBIC/etc., install + the XS version into ~/.perlonjava/lib/, and shadow the bundled shim — + breaking every module that imports Class::XSAccessor at runtime. + + Skip the build/test/install steps entirely; the bundled shim is all + PerlOnJava needs. +match: + distribution: "^SMUELLER/Class-XSAccessor-" +pl: + commandline: "true" +make: + commandline: "true" +test: + commandline: "true" +install: + commandline: "true" diff --git a/src/test/java/org/perlonjava/ModuleTestExecutionTest.java b/src/test/java/org/perlonjava/ModuleTestExecutionTest.java index 3b6a012ad..b7b65375d 100644 --- a/src/test/java/org/perlonjava/ModuleTestExecutionTest.java +++ b/src/test/java/org/perlonjava/ModuleTestExecutionTest.java @@ -182,13 +182,27 @@ private void executeModuleTest(String filename) { Path moduleDir = resolveModuleDir(filename); System.setProperty("user.dir", moduleDir.toAbsolutePath().toString()); - String content = new String(inputStream.readAllBytes(), StandardCharsets.UTF_8); + // Read source bytes as ISO-8859-1 so each byte maps 1:1 to a char. + // Perl source files are not always UTF-8 (e.g., Text-CSV/t/55_combi.t + // embeds literal byte 0xE4 for 'ä' in a single-quoted string). + // Strict UTF-8 decoding would replace such bytes with U+FFFD, and + // the Perl compiler would then re-encode that U+FFFD back to its + // 3-byte UTF-8 representation (EF BF BD) in the compiled string. + // ISO-8859-1 passes every byte through unchanged as a char, which + // matches what ./jperl does when reading from the filesystem. + String content = new String(inputStream.readAllBytes(), StandardCharsets.ISO_8859_1); if (content.indexOf('\r') >= 0) { content = content.replace("\r\n", "\n").replace("\r", "\n"); } CompilerOptions options = new CompilerOptions(); options.code = content; + // Mark source as raw bytes so the parser preserves non-ASCII bytes + // (e.g., Latin-1 0xE4) as single-byte chars rather than re-encoding + // them as UTF-8 sequences in compiled string literals. + // Matches what FileUtils.readFileWithEncodingDetection does for + // ./jperl when it detects ISO-8859-1 source. + options.isByteStringSource = true; // Set fileName relative to the module directory (CWD) so $0, FindBin, etc. resolve correctly // e.g., "module/Net-SSLeay/t/local/05_passwd_cb.t" -> "t/local/05_passwd_cb.t" Path moduleDirRel = Paths.get("module", filename.split("[/\\\\]")[1]);