From babe835e9bc596006f15a3b3dafec701995b14f6 Mon Sep 17 00:00:00 2001 From: Lukas Mai Date: Fri, 11 Jul 2025 09:07:00 +0200 Subject: [PATCH 1/4] newFOROP: fix crash when optimizing 2-var for over builtin::indexed OP_ENTERSUB isn't necessarily a LISTOP, apparently, so we can't just grab its op_last. Instead, copy/paste logic from elsewhere in op.c to find the cvop. Also, avoid crashing on "fake" pad entries that represent lexical subs from outer scopes by climbing up the scope chain until we reach a real pad entry. Fixes #23405. --- op.c | 14 +++++++++++--- t/op/for-many.t | 13 +++++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/op.c b/op.c index d7aaca7fe73c..6a5f236cdf86 100644 --- a/op.c +++ b/op.c @@ -9671,7 +9671,7 @@ S_op_is_cv_xsub(pTHX_ OP *o, XSUBADDR_t xsub) } case OP_PADCV: - cv = (CV *)PAD_SVl(o->op_targ); + cv = find_lexical_cv(o->op_targ); assert(cv && SvTYPE(cv) == SVt_PVCV); break; @@ -9689,10 +9689,18 @@ S_op_is_cv_xsub(pTHX_ OP *o, XSUBADDR_t xsub) static bool S_op_is_call_to_cv_xsub(pTHX_ OP *o, XSUBADDR_t xsub) { - if(o->op_type != OP_ENTERSUB) + if (o->op_type != OP_ENTERSUB) return false; - OP *cvop = cLISTOPx(cUNOPo->op_first)->op_last; + /* entersub may be a UNOP, not a LISTOP, so we can't just use op_last */ + OP *aop = cUNOPo->op_first; + if (!OpHAS_SIBLING(aop)) { + aop = cUNOPx(aop)->op_first; + } + aop = OpSIBLING(aop); + OP *cvop; + for (cvop = aop; OpHAS_SIBLING(cvop); cvop = OpSIBLING(cvop)) ; + return op_is_cv_xsub(cvop, xsub); } diff --git a/t/op/for-many.t b/t/op/for-many.t index 2f6790aee775..035d1da07e91 100644 --- a/t/op/for-many.t +++ b/t/op/for-many.t @@ -498,4 +498,17 @@ is($continue, 'xx', 'continue reached twice'); is("@have", "Pointy end Up Flamey end Down", 'for my ($one, $two)'); } +# GH #23405 - segfaults when compiling 2-var for loops +{ + my $dummy = sub {}; + for my ($x, $y) (main->$dummy) {} + pass '2-var for does not crash on method calls'; + + my sub dummy {} + sub { + for my ($x, $y) (dummy) {} + }->(); + pass '2-var for does not crash on lexical sub calls'; +} + done_testing(); From 35c000c969e80c7c4ee04808ed39473ac0eba531 Mon Sep 17 00:00:00 2001 From: Lukas Mai Date: Thu, 17 Jul 2025 06:56:53 +0200 Subject: [PATCH 2/4] perf/opcount.t: ensure imported builtins are optimized When I saw (in #23429) that use builtin qw(indexed); sub { for my ($x, $y) (indexed) {} } crashes, but sub { for my ($x, $y) (builtin::indexed) {} } works fine, I realized that optrees for calls to lexically imported subs look different from calls to package subs. This commit make sure both call variants are optimized to direct ops. --- t/perf/opcount.t | 129 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 124 insertions(+), 5 deletions(-) diff --git a/t/perf/opcount.t b/t/perf/opcount.t index d3863690323e..a48db773141c 100644 --- a/t/perf/opcount.t +++ b/t/perf/opcount.t @@ -698,6 +698,21 @@ test_opcount(0, "multiconcat: local assign", # builtin:: function calls should be replaced with efficient op implementations no warnings 'experimental::builtin'; +use builtin qw( + blessed + ceil + false + floor + indexed + is_bool + is_tainted + is_weak + refaddr + reftype + true + unweaken + weaken +); test_opcount(0, "builtin::true/false are replaced with constants", sub { my $x = builtin::true(); my $y = builtin::false() }, @@ -706,6 +721,13 @@ test_opcount(0, "builtin::true/false are replaced with constants", const => 2, }); +test_opcount(0, "imported true/false are replaced with constants", + sub { my $x = true(); my $y = false() }, + { + entersub => 0, + const => 2, + }); + test_opcount(0, "builtin::is_bool is replaced with direct opcode", sub { my $x; my $y; $y = builtin::is_bool($x); 1; }, { @@ -715,6 +737,15 @@ test_opcount(0, "builtin::is_bool is replaced with direct opcode", padsv_store => 1, }); +test_opcount(0, "imported is_bool is replaced with direct opcode", + sub { my $x; my $y; $y = is_bool($x); 1; }, + { + entersub => 0, + is_bool => 1, + padsv => 3, + padsv_store => 1, + }); + test_opcount(0, "builtin::is_bool gets constant-folded", sub { builtin::is_bool(123); }, { @@ -723,6 +754,14 @@ test_opcount(0, "builtin::is_bool gets constant-folded", const => 1, }); +test_opcount(0, "imported is_bool gets constant-folded", + sub { is_bool(123); }, + { + entersub => 0, + is_bool => 0, + const => 1, + }); + test_opcount(0, "builtin::weaken is replaced with direct opcode", sub { my $x = []; builtin::weaken($x); }, { @@ -730,6 +769,13 @@ test_opcount(0, "builtin::weaken is replaced with direct opcode", weaken => 1, }); +test_opcount(0, "imported weaken is replaced with direct opcode", + sub { my $x = []; weaken($x); }, + { + entersub => 0, + weaken => 1, + }); + test_opcount(0, "builtin::unweaken is replaced with direct opcode", sub { my $x = []; builtin::unweaken($x); }, { @@ -737,6 +783,13 @@ test_opcount(0, "builtin::unweaken is replaced with direct opcode", unweaken => 1, }); +test_opcount(0, "imported unweaken is replaced with direct opcode", + sub { my $x = []; unweaken($x); }, + { + entersub => 0, + unweaken => 1, + }); + test_opcount(0, "builtin::is_weak is replaced with direct opcode", sub { builtin::is_weak([]); }, { @@ -744,6 +797,13 @@ test_opcount(0, "builtin::is_weak is replaced with direct opcode", is_weak => 1, }); +test_opcount(0, "imported is_weak is replaced with direct opcode", + sub { is_weak([]); }, + { + entersub => 0, + is_weak => 1, + }); + test_opcount(0, "builtin::blessed is replaced with direct opcode", sub { builtin::blessed([]); }, { @@ -751,6 +811,13 @@ test_opcount(0, "builtin::blessed is replaced with direct opcode", blessed => 1, }); +test_opcount(0, "imported blessed is replaced with direct opcode", + sub { blessed([]); }, + { + entersub => 0, + blessed => 1, + }); + test_opcount(0, "builtin::refaddr is replaced with direct opcode", sub { builtin::refaddr([]); }, { @@ -758,6 +825,13 @@ test_opcount(0, "builtin::refaddr is replaced with direct opcode", refaddr => 1, }); +test_opcount(0, "imported refaddr is replaced with direct opcode", + sub { refaddr([]); }, + { + entersub => 0, + refaddr => 1, + }); + test_opcount(0, "builtin::reftype is replaced with direct opcode", sub { builtin::reftype([]); }, { @@ -765,6 +839,13 @@ test_opcount(0, "builtin::reftype is replaced with direct opcode", reftype => 1, }); +test_opcount(0, "imported reftype is replaced with direct opcode", + sub { reftype([]); }, + { + entersub => 0, + reftype => 1, + }); + my $one_point_five = 1.5; # Prevent const-folding. test_opcount(0, "builtin::ceil is replaced with direct opcode", sub { builtin::ceil($one_point_five); }, @@ -773,6 +854,13 @@ test_opcount(0, "builtin::ceil is replaced with direct opcode", ceil => 1, }); +test_opcount(0, "imported ceil is replaced with direct opcode", + sub { ceil($one_point_five); }, + { + entersub => 0, + ceil => 1, + }); + test_opcount(0, "builtin::floor is replaced with direct opcode", sub { builtin::floor($one_point_five); }, { @@ -780,6 +868,13 @@ test_opcount(0, "builtin::floor is replaced with direct opcode", floor => 1, }); +test_opcount(0, "imported floor is replaced with direct opcode", + sub { floor($one_point_five); }, + { + entersub => 0, + floor => 1, + }); + test_opcount(0, "builtin::is_tainted is replaced with direct opcode", sub { builtin::is_tainted($0); }, { @@ -787,6 +882,13 @@ test_opcount(0, "builtin::is_tainted is replaced with direct opcode", is_tainted => 1, }); +test_opcount(0, "imported is_tainted is replaced with direct opcode", + sub { is_tainted($0); }, + { + entersub => 0, + is_tainted => 1, + }); + # void sassign + padsv combinations are replaced by padsv_store test_opcount(0, "sassign + padsv replaced by padsv_store", sub { my $y; my $z = $y = 3; 1; }, @@ -1014,18 +1116,35 @@ test_opcount(0, "Empty anonhash ref and direct lexical assignment", test_opcount(0, "foreach 2 lexicals on builtin::indexed ARRAY", sub { my @input = (); foreach my ($i, $x) (builtin::indexed @input) { } }, { - entersub => 0, # no call to builtin::indexed + entersub => 0, # no call to builtin::indexed enteriter => 1, - iter => 1, - padav => 2, + iter => 1, + padav => 2, + }); + +test_opcount(0, "foreach 2 lexicals on imported indexed ARRAY", + sub { my @input = (); foreach my ($i, $x) (indexed @input) { } }, + { + entersub => 0, # no call to builtin::indexed + enteriter => 1, + iter => 1, + padav => 2, }); test_opcount(0, "foreach 2 lexicals on builtin::indexed LIST", sub { foreach my ($i, $x) (builtin::indexed qw( x y z )) { } }, { - entersub => 0, # no call to builtin::indexed + entersub => 0, # no call to builtin::indexed + enteriter => 1, + iter => 1, + }); + +test_opcount(0, "foreach 2 lexicals on imported indexed LIST", + sub { foreach my ($i, $x) (indexed qw( x y z )) { } }, + { + entersub => 0, # no call to builtin::indexed enteriter => 1, - iter => 1, + iter => 1, }); # substr with const zero offset and "" replacements From 8166f2fbe6acf8743907c15dda62ffbe2c50bdd5 Mon Sep 17 00:00:00 2001 From: Lukas Mai Date: Sun, 13 Jul 2025 00:04:12 +0200 Subject: [PATCH 3/4] perldelta for the 2-var 'for' crash (#23405) --- pod/perldelta.pod | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 280ac72b07b5..eb5a7dd2268a 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -366,7 +366,29 @@ manager will later use a regex to expand these into links. =item * -XXX +Certain constructs involving a two-variable C loop would crash the perl +compiler in v5.42.0: + + # Two-variable for loop over a list returned from a method call: + for my ($x, $y) (Some::Class->foo()) { ... } + for my ($x, $y) ($object->foo()) { ... } + +and + + # Two-variable for loop over a list returned from a call to a + # lexical(ly imported) subroutine, all inside a lexically scoped + # or anonymous subroutine: + my sub foo { ... } + my $fn = sub { + for my ($x, $y) (foo()) { ... } + }; + + use builtin qw(indexed); # lexical import! + my sub bar { + for my ($x, $y) (indexed(...)) { ... } + } + +These have been fixed. [GH #23405] =back From d87975f1396fec58ff25f0ef678799c47ddbdaeb Mon Sep 17 00:00:00 2001 From: Lukas Mai Date: Mon, 21 Jul 2025 08:07:50 +0200 Subject: [PATCH 4/4] op.c: factor out repeated entersub code ... for locating the beginning of the argument list and the end (which is the entered sub itself) in the op tree. --- op.c | 66 +++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 39 insertions(+), 27 deletions(-) diff --git a/op.c b/op.c index 6a5f236cdf86..3884b2f2ffb9 100644 --- a/op.c +++ b/op.c @@ -9652,6 +9652,33 @@ Perl_newWHILEOP(pTHX_ I32 flags, I32 debuggable, LOOP *loop, return o; } +#define find_argop_from_entersub(op) S_find_argop_from_entersub(op) +static OP * +S_find_argop_from_entersub(OP *entersubop) { + assert(entersubop != NULL); + + OP *aop = cUNOPx(entersubop)->op_first; + if (!OpHAS_SIBLING(aop)) { + aop = cUNOPx(aop)->op_first; + } + /* move past pushmark */ + aop = OpSIBLING(aop); + + return aop; +} + +#define find_cvop_from_argop(op) S_find_cvop_from_argop(op) +static OP * +S_find_cvop_from_argop(OP *cvop) { + assert(cvop != NULL); + + /* CV is the last argument to entersub */ + while (OpHAS_SIBLING(cvop)) { + cvop = OpSIBLING(cvop); + } + return cvop; +} + #define op_is_cv_xsub(o, xsub) S_op_is_cv_xsub(aTHX_ o, xsub) static bool S_op_is_cv_xsub(pTHX_ OP *o, XSUBADDR_t xsub) @@ -9693,13 +9720,8 @@ S_op_is_call_to_cv_xsub(pTHX_ OP *o, XSUBADDR_t xsub) return false; /* entersub may be a UNOP, not a LISTOP, so we can't just use op_last */ - OP *aop = cUNOPo->op_first; - if (!OpHAS_SIBLING(aop)) { - aop = cUNOPx(aop)->op_first; - } - aop = OpSIBLING(aop); - OP *cvop; - for (cvop = aop; OpHAS_SIBLING(cvop); cvop = OpSIBLING(cvop)) ; + OP *aop = find_argop_from_entersub(o); + OP *cvop = find_cvop_from_argop(aop); return op_is_cv_xsub(cvop, xsub); } @@ -14723,10 +14745,7 @@ Perl_ck_entersub_args_list(pTHX_ OP *entersubop) PERL_ARGS_ASSERT_CK_ENTERSUB_ARGS_LIST; - aop = cUNOPx(entersubop)->op_first; - if (!OpHAS_SIBLING(aop)) - aop = cUNOPx(aop)->op_first; - for (aop = OpSIBLING(aop); OpHAS_SIBLING(aop); aop = OpSIBLING(aop)) { + for (aop = find_argop_from_entersub(entersubop); OpHAS_SIBLING(aop); aop = OpSIBLING(aop)) { /* skip the extra attributes->import() call implicitly added in * something like foo(my $x : bar) */ @@ -14773,7 +14792,7 @@ Perl_ck_entersub_args_proto(pTHX_ OP *entersubop, GV *namegv, SV *protosv) { STRLEN proto_len; const char *proto, *proto_end; - OP *aop, *prev, *cvop, *parent; + OP *aop, *prev, *parent; int optional = 0; I32 arg = 0; I32 contextclass = 0; @@ -14795,7 +14814,7 @@ Perl_ck_entersub_args_proto(pTHX_ OP *entersubop, GV *namegv, SV *protosv) } prev = aop; aop = OpSIBLING(aop); - for (cvop = aop; OpHAS_SIBLING(cvop); cvop = OpSIBLING(cvop)) ; + OP *cvop = find_cvop_from_argop(aop); while (aop != cvop) { OP* o3 = aop; @@ -15030,18 +15049,17 @@ Perl_ck_entersub_args_proto_or_list(pTHX_ OP *entersubop, OP * Perl_ck_entersub_args_core(pTHX_ OP *entersubop, GV *namegv, SV *protosv) { + PERL_ARGS_ASSERT_CK_ENTERSUB_ARGS_CORE; + IV cvflags = SvIVX(protosv); int opnum = cvflags & 0xffff; OP *aop = cUNOPx(entersubop)->op_first; - PERL_ARGS_ASSERT_CK_ENTERSUB_ARGS_CORE; - if (!opnum) { - OP *cvop; if (!OpHAS_SIBLING(aop)) aop = cUNOPx(aop)->op_first; aop = OpSIBLING(aop); - for (cvop = aop; OpSIBLING(cvop); cvop = OpSIBLING(cvop)) ; + OP *cvop = find_cvop_from_argop(aop); if (aop != cvop) { SV *namesv = cv_name((CV *)namegv, NULL, CV_NAME_NOTQUAL); yyerror_pv(form("Too many arguments for %" SVf, @@ -15319,21 +15337,15 @@ S_entersub_alloc_targ(pTHX_ OP * const o) OP * Perl_ck_subr(pTHX_ OP *o) { - OP *aop, *cvop; - CV *cv; - GV *namegv; SV **const_class = NULL; OP *const_op = NULL; PERL_ARGS_ASSERT_CK_SUBR; - aop = cUNOPx(o)->op_first; - if (!OpHAS_SIBLING(aop)) - aop = cUNOPx(aop)->op_first; - aop = OpSIBLING(aop); - for (cvop = aop; OpHAS_SIBLING(cvop); cvop = OpSIBLING(cvop)) ; - cv = rv2cv_op_cv(cvop, RV2CVOPCV_MARK_EARLY); - namegv = cv ? (GV*)rv2cv_op_cv(cvop, RV2CVOPCV_MAYBE_NAME_GV) : NULL; + OP *aop = find_argop_from_entersub(o); + OP *cvop = find_cvop_from_argop(aop); + CV *cv = rv2cv_op_cv(cvop, RV2CVOPCV_MARK_EARLY); + GV *namegv = cv ? (GV*)rv2cv_op_cv(cvop, RV2CVOPCV_MAYBE_NAME_GV) : NULL; o->op_private &= ~1; o->op_private |= (PL_hints & HINT_STRICT_REFS);