fglock · fglock · Jan 13, 2026 · Jan 6, 2026 · Jan 6, 2026 · Jan 7, 2026
diff --git a/.gitignore b/.gitignore
@@ -43,6 +43,8 @@ test_*.pl
 *.tmp
 test_*.tmp
 pack_test_*.log
+*.log
+make_repro_asm.log
 out.json
 test_results.json
 logs/
@@ -52,6 +54,10 @@ logs/
 # But allow patch files in import-perl5/patches/
 !dev/import-perl5/patches/*.patch
 
+skip_control_flow.t
+dev/design/CONTROL_FLOW_FIX_RESULTS.md
+dev/prompts/CONTROL_FLOW_FIX_INSTRUCTIONS.md
+
 # Ignore test artifact files (numbered .txt and .pod files)
 [0-9][0-9][0-9][0-9][0-9][0-9].txt
 [0-9][0-9][0-9][0-9][0-9].pod

diff --git a/dev/design/bytecode_debugging.md b/dev/design/bytecode_debugging.md
@@ -0,0 +1,144 @@
+# Bytecode debugging workflow (ASM verifier failures)
+
+This document describes the workflow used to diagnose and fix JVM bytecode verification / ASM frame computation failures in PerlOnJava generated classes (typically `org/perlonjava/anonNNN`).
+
+## Symptoms
+
+Typical failures during compilation / class generation:
+
+- `java.lang.NegativeArraySizeException: -4`
+  - Usually from ASM `ClassWriter.COMPUTE_FRAMES` when stack map frame computation fails.
+- `org.objectweb.asm.tree.analysis.AnalyzerException`
+  - Examples:
+    - `Incompatible stack heights`
+    - `Cannot pop operand off an empty stack.`
+- `ArrayIndexOutOfBoundsException` inside `org.objectweb.asm.Frame.merge`
+
+These errors generally mean the generated method has invalid stack behavior at a control-flow merge.
+
+## Key idea
+
+PerlOnJava generates Java bytecode with ASM. The JVM verifier requires consistent stack-map frames at merge points.
+
+The most common root cause is:
+
+- A control-flow edge (e.g. `GOTO returnLabel`) is taken while **some unrelated value is still on the JVM operand stack**, or
+- The compiler’s own stack tracking drifts and emits `POP` instructions that do not correspond to reality.
+
+In practice this happens when a subexpression may perform **non-local control flow** (tagged returns), such as:
+
+- `return`
+- `next` / `last` / `redo` outside the immediate loop
+- `goto &NAME` tail calls
+
+## Enabling diagnostics
+
+### Environment variables
+
+- `JPERL_LARGECODE=refactor`
+  - Enables large-block refactoring to avoid `MethodTooLargeException`.
+- `JPERL_ASM_DEBUG=1`
+  - Enables detailed debug output when ASM frame computation fails.
+- `JPERL_ASM_DEBUG_CLASS=anonNNN` (optional)
+  - Restricts debug output to matching generated classes.
+- `JPERL_OPTS='-Xmx512m'` (example)
+  - Controls JVM options for the launcher.
+
+### Typical repro command
+
+Run from `perl5_t/` so that `./test.pl` and relative includes resolve:
+
+```
+JPERL_LARGECODE=refactor \
+JPERL_ASM_DEBUG=1 \
+JPERL_OPTS='-Xmx512m' \
+../jperl t/op/pack.t \
+  > /tmp/perlonjava_pack_out.log \
+  2> /tmp/perlonjava_pack_err.log
+```
+
+## Reading the debug output
+
+When `JPERL_ASM_DEBUG=1` is enabled, `EmitterMethodCreator` prints:
+
+- The failing generated class name: `org/perlonjava/anonNNN`
+- The AST index and source file name (if available)
+- A verifier run that produces a concrete:
+  - method signature
+  - failing instruction index
+
+Look for:
+
+- `ASM frame compute crash in generated class: org/perlonjava/anonNNN ...`
+- `BasicInterpreter failure in org/perlonjava/anonNNN.apply(... ) at instruction K`
+
+Then inspect the printed instruction window:
+
+- Identify the failing instruction `K`.
+- Look for the **last control-flow jump** into the label after `K`.
+- Compare the operand stack shape across predecessors (often printed as `frame stack sizes`).
+
+## Mapping failures back to emitters
+
+Common patterns:
+
+### 1) Extra value left on operand stack
+
+A typical signature:
+
+- One predecessor arrives at `returnLabel` with stack size `2` (e.g. `[result, extra]`)
+- Other predecessors arrive with stack size `1` (`[result]`)
+
+This is most often due to evaluating a left operand and keeping it on-stack while evaluating a right operand that may jump away.
+
+Fix strategy:
+
+- **Spill intermediate values to locals** before evaluating anything that might trigger tagged control flow.
+
+### 2) Over-eager `POP` emission
+
+A typical signature:
+
+- `AnalyzerException: Cannot pop operand off an empty stack.`
+- The instruction window shows multiple `POP`s without corresponding pushes.
+
+Fix strategy:
+
+- Avoid emitting `POP`s based on unreliable stack accounting.
+- Prefer spilling to locals at the point where the compiler knows the stack is clean.
+
+## Places to look in the code
+
+- `src/main/java/org/perlonjava/codegen/EmitterMethodCreator.java`
+  - Owns:
+    - frame computation (`COMPUTE_FRAMES`)
+    - the no-frames diagnostic pass
+    - `BasicInterpreter` analysis output
+- `src/main/java/org/perlonjava/codegen/EmitSubroutine.java`
+  - Emits `RuntimeCode.apply(...)`
+  - Tagged-return handling at call sites
+- `src/main/java/org/perlonjava/codegen/EmitControlFlow.java`
+  - Emits bytecode for `return`, `next/last/redo`, `goto`.
+- Various operator emitters that evaluate LHS then RHS:
+  - Any that keep LHS on stack across RHS evaluation are suspects.
+
+## Practical debugging loop
+
+1. Reproduce with `JPERL_ASM_DEBUG=1`.
+2. Record failing `anonNNN` and instruction index.
+3. Identify whether it’s:
+   - stack height mismatch at merge, or
+   - stack underflow from bad POPs.
+4. Patch the responsible emitter (usually by spilling intermediates).
+5. Rebuild jar:
+
+```
+./gradlew shadowJar
+```
+
+6. Re-run the test.
+
+## Notes
+
+- `jperl` runs `target/perlonjava-3.0.0.jar`. Rebuild after changes, otherwise you may be debugging stale code.
+- `JPERL_ASM_DEBUG_CLASS` is useful to avoid massive logs during large tests.
diff --git a/dev/tools/perl_test_runner.pl b/dev/tools/perl_test_runner.pl
@@ -251,6 +251,8 @@ sub run_single_test {
         | op/sprintf.t
         | base/lex.t }x
         ? "warn" : "";
+    local $ENV{JPERL_LARGECODE} = $test_file =~ m{opbasic/concat\.t$}
+        ? "refactor" : "";
     local $ENV{JPERL_OPTS} = $test_file =~ m{
           re/pat.t
         | op/repeat.t
@@ -315,6 +317,7 @@ sub run_single_test {
     # Capture output with timeout
     my $output = '';
     my $exit_code = 0;
+    my $raw_output_path;
 
     if ($timeout_cmd) {
         # Use external timeout
@@ -334,6 +337,12 @@ sub run_single_test {
         }
     }
 
+    $raw_output_path = "/tmp/perl_test_output_$$" . "_" . time() . "_" . int(rand(1000)) . ".log";
+    if (open my $fh, '>', $raw_output_path) {
+        print $fh $output;
+        close $fh;
+    }
+
     # Restore directory
     chdir($old_dir);
 
@@ -345,10 +354,13 @@ sub run_single_test {
             planned_tests => 0, actual_tests_run => 0, incomplete_tests => 0,
             skip_count => 0, todo_count => 0,
             errors => ['Test timed out'], missing_features => []
+            , raw_output_path => $raw_output_path
         };
     }
 
-    return parse_tap_output($output, $exit_code);
+    my $result = parse_tap_output($output, $exit_code);
+    $result->{raw_output_path} = $raw_output_path;
+    return $result;
 }
 
 sub start_test_job {

diff --git a/src/main/java/org/perlonjava/CompilerOptions.java b/src/main/java/org/perlonjava/CompilerOptions.java
@@ -59,6 +59,8 @@ public class CompilerOptions implements Cloneable {
     public StringBuilder rudimentarySwitchAssignments = null; // Variable assignments from -s
     public boolean discardLeadingGarbage = false; // For -x
     public boolean isUnicodeSource = false; // Set to true for UTF-16/UTF-32 source files
+    public boolean isEvalbytes = false; // Set to true for evalbytes context - treats strings as raw bytes
+    public boolean isByteStringSource = false; // Set to true when parsing source that originates from a BYTE_STRING scalar (raw bytes)
     public boolean taintMode = false; // For -T
     public boolean allowUnsafeOperations = false; // For -U
     public boolean runUnderDebugger = false; // For -d

diff --git a/src/main/java/org/perlonjava/astnode/AbstractNode.java b/src/main/java/org/perlonjava/astnode/AbstractNode.java
@@ -18,6 +18,14 @@ public abstract class AbstractNode implements Node {
     // Lazy initialization - only created when first annotation is set
     public Map<String, Object> annotations;
 
+    private int internalAnnotationFlags;
+    private static final int FLAG_BLOCK_ALREADY_REFACTORED = 1;
+    private static final int FLAG_QUEUED_FOR_REFACTOR = 2;
+    private static final int FLAG_CHUNK_ALREADY_REFACTORED = 4;
+
+    private int cachedBytecodeSize = Integer.MIN_VALUE;
+    private byte cachedHasAnyControlFlow = -1;
+
     @Override
     public int getIndex() {
         return tokenIndex;
@@ -47,13 +55,52 @@ public String toString() {
     }
 
     public void setAnnotation(String key, Object value) {
+        if (value instanceof Boolean boolVal && boolVal) {
+            if ("blockAlreadyRefactored".equals(key)) {
+                internalAnnotationFlags |= FLAG_BLOCK_ALREADY_REFACTORED;
+                return;
+            }
+            if ("queuedForRefactor".equals(key)) {
+                internalAnnotationFlags |= FLAG_QUEUED_FOR_REFACTOR;
+                return;
+            }
+            if ("chunkAlreadyRefactored".equals(key)) {
+                internalAnnotationFlags |= FLAG_CHUNK_ALREADY_REFACTORED;
+                return;
+            }
+        }
         if (annotations == null) {
             annotations = new HashMap<>();
         }
         annotations.put(key, value);
     }
 
+    public Integer getCachedBytecodeSize() {
+        return cachedBytecodeSize == Integer.MIN_VALUE ? null : cachedBytecodeSize;
+    }
+
+    public void setCachedBytecodeSize(int size) {
+        this.cachedBytecodeSize = size;
+    }
+
+    public Boolean getCachedHasAnyControlFlow() {
+        return cachedHasAnyControlFlow < 0 ? null : cachedHasAnyControlFlow != 0;
+    }
+
+    public void setCachedHasAnyControlFlow(boolean hasAnyControlFlow) {
+        this.cachedHasAnyControlFlow = (byte) (hasAnyControlFlow ? 1 : 0);
+    }
+
     public Object getAnnotation(String key) {
+        if ("blockAlreadyRefactored".equals(key)) {
+            return (internalAnnotationFlags & FLAG_BLOCK_ALREADY_REFACTORED) != 0;
+        }
+        if ("queuedForRefactor".equals(key)) {
+            return (internalAnnotationFlags & FLAG_QUEUED_FOR_REFACTOR) != 0;
+        }
+        if ("chunkAlreadyRefactored".equals(key)) {
+            return (internalAnnotationFlags & FLAG_CHUNK_ALREADY_REFACTORED) != 0;
+        }
         return annotations == null ? null : annotations.get(key);
     }
 

diff --git a/src/main/java/org/perlonjava/astnode/CompilerFlagNode.java b/src/main/java/org/perlonjava/astnode/CompilerFlagNode.java
@@ -7,7 +7,7 @@
  * compiler flags such as warnings, features, and strict options.
  */
 public class CompilerFlagNode extends AbstractNode {
-    private final int warningFlags;
+    private final java.util.BitSet warningFlags;
     private final int featureFlags;
     private final int strictOptions;
 
@@ -19,8 +19,8 @@ public class CompilerFlagNode extends AbstractNode {
      * @param strictOptions the bitmask representing the state of strict options
      * @param tokenIndex    the index of the token in the source code
      */
-    public CompilerFlagNode(int warningFlags, int featureFlags, int strictOptions, int tokenIndex) {
-        this.warningFlags = warningFlags;
+    public CompilerFlagNode(java.util.BitSet warningFlags, int featureFlags, int strictOptions, int tokenIndex) {
+        this.warningFlags = (java.util.BitSet) warningFlags.clone();
         this.featureFlags = featureFlags;
         this.strictOptions = strictOptions;
         this.tokenIndex = tokenIndex;
@@ -31,7 +31,7 @@ public CompilerFlagNode(int warningFlags, int featureFlags, int strictOptions, i
      *
      * @return the warning flags bitmask
      */
-    public int getWarningFlags() {
+    public java.util.BitSet getWarningFlags() {
         return warningFlags;
     }