Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
dec6a10
fix: %_ strict vars + use lib prepend ordering for Text::CSV support
fglock Apr 3, 2026
09c07ce
fix: @INC ordering + blib support for CPAN module testing
fglock Apr 3, 2026
2371420
fix: bytecode compiler last/next/redo skips do-while to find true loop
fglock Apr 3, 2026
502e909
fix: add bytes:: functions and glob method dispatch for Text::CSV
fglock Apr 3, 2026
de12fa4
docs: update Text::CSV fix plan with Phase 3b/3c results
fglock Apr 3, 2026
19e6e68
fix: bytecode HINT_BYTES parity and raw-bytes DATA section
fglock Apr 3, 2026
bf068d9
fix: logical operator VOID context and PerlIO::get_layers NPE
fglock Apr 3, 2026
66804ca
docs: update Text::CSV fix plan with Phase 4 results and next steps
fglock Apr 3, 2026
236d728
fix: local %hash now saves/restores globalHashes map entry
fglock Apr 3, 2026
2a33690
fix: readline now returns BYTE_STRING for handles without encoding la…
fglock Apr 3, 2026
51b2fe6
docs: update Text::CSV fix plan with Phase 5 results
fglock Apr 3, 2026
cdf53de
fix: untie retains last FETCH value, fix UTF-16/32 encoding layer reads
fglock Apr 3, 2026
d66c6b0
fix: UTF-8 encode wide characters on binary handles, fix utf8::decode…
fglock Apr 3, 2026
b0a5aa9
fix: use bytes regex matching, Latin-1 source encoding detection
fglock Apr 3, 2026
e295ccc
fix: Wide character in print warning, utf8::upgrade preserves content
fglock Apr 3, 2026
871e26b
fix: print reads internal ORS/OFS, not aliased $\ and $, variables
fglock Apr 4, 2026
e90e4c8
fix: preserve gotoLabelPcs in InterpretedCode.withCapturedVars()
fglock Apr 4, 2026
8b8e2b9
fix: preserve BYTE_STRING type through tr/// and substr operations
fglock Apr 4, 2026
6d401b2
fix: comprehensive BYTE_STRING type preservation across string operat…
fglock Apr 4, 2026
9ea6ce2
fix: Encode::decode drops orphan trailing bytes for UTF-16/32
fglock Apr 4, 2026
fd6f04a
docs: update Text::CSV fix plan — Phase 7 complete, 39/40 tests pass
fglock Apr 4, 2026
4c5563c
fix: s/// preserves wide chars, :crlf read avoids over-consuming
fglock Apr 4, 2026
52e212c
docs: update Text::CSV fix plan — Phase 8 regression fixes complete
fglock Apr 4, 2026
37c6077
fix: Unicode property patterns now safe for Pattern.COMMENTS mode
fglock Apr 4, 2026
e102771
fix: resolve regressions in op/anonsub.t, comp/parser_run.t, re/pat_a…
fglock Apr 4, 2026
f90d087
feat: implement namespace::autoclean to actually clean imported funct…
fglock Apr 4, 2026
86a6836
docs: update Text::CSV fix plan — Phase 9 regression fixes + namespac…
fglock Apr 4, 2026
3678c6d
fix: namespace::autoclean preserves companion package methods
fglock Apr 4, 2026
791c0b1
fix: utf8::valid() now returns true for byte strings (matching Perl 5)
fglock Apr 4, 2026
82a5167
fix: suppress spurious warnings in Text::CSV tests
fglock Apr 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/main/java/org/perlonjava/core/Configuration.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public final class Configuration {
* Automatically populated by Gradle/Maven during build.
* DO NOT EDIT MANUALLY - this value is replaced at build time.
*/
public static final String gitCommitId = "b037509d0";
public static final String gitCommitId = "4aafb6057";

/**
* Git commit date of the build (ISO format: YYYY-MM-DD).
Expand Down
27 changes: 27 additions & 0 deletions src/main/java/org/perlonjava/frontend/parser/ParseHeredoc.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@
import org.perlonjava.frontend.lexer.LexerTokenType;
import org.perlonjava.runtime.runtimetypes.PerlCompilerException;

import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;

import static org.perlonjava.frontend.parser.StringParser.parseRawString;
import static org.perlonjava.runtime.perlmodule.Strict.HINT_UTF8;

public class ParseHeredoc {
static OperatorNode parseHeredoc(Parser parser, String tokenText) {
Expand Down Expand Up @@ -212,6 +214,16 @@ else if (currentIndex >= tokens.size() ||
String string = content.toString();
if (CompilerOptions.DEBUG_ENABLED) parser.ctx.logDebug("Final heredoc content: <<" + string + ">>");

// Without `use utf8`, convert Unicode chars back to UTF-8 byte values,
// matching Perl 5's treatment of source bytes as Latin-1/octets.
// Skip if source is already ISO-8859-1 (isByteStringSource) — chars already
// represent raw byte values and need no conversion.
if (!parser.ctx.symbolTable.isStrictOptionEnabled(HINT_UTF8)
&& !parser.ctx.compilerOptions.isUnicodeSource
&& !parser.ctx.compilerOptions.isByteStringSource) {
string = convertToOctets(string);
}

// Rewrite the heredoc node, according to the delimiter
Node operand = null;
switch (delimiter) {
Expand Down Expand Up @@ -293,4 +305,19 @@ public static void restoreHeredocStateIfNeeded(Parser parser, List<OperatorNode>
parser.getHeredocNodes().addAll(savedHeredocNodes);
}
}

/**
* Convert a Unicode string back to UTF-8 byte values.
* Without `use utf8`, Perl treats source bytes as Latin-1/octets.
* Since Java reads source files as UTF-8 and decodes multi-byte sequences
* into single characters, we need to reverse this for Perl compatibility.
*/
private static String convertToOctets(String str) {
byte[] utf8Bytes = str.getBytes(StandardCharsets.UTF_8);
StringBuilder octetString = new StringBuilder(utf8Bytes.length);
for (byte b : utf8Bytes) {
octetString.append((char) (b & 0xFF));
}
return octetString.toString();
}
}
5 changes: 3 additions & 2 deletions src/main/java/org/perlonjava/frontend/parser/Variable.java
Original file line number Diff line number Diff line change
Expand Up @@ -925,8 +925,9 @@ public static Node parseBracedVariable(Parser parser, String sigil, boolean isSt
if (TokenUtils.peek(parser).text.equals("}")) {
TokenUtils.consume(parser, LexerTokenType.OPERATOR, "}");

// Issue ambiguity warning if needed
if (isAmbiguous) {
// Issue ambiguity warning if needed (not inside string interpolation,
// matching Perl 5 which only warns in code context)
if (isAmbiguous && !isStringInterpolation) {
String accessType = "";
if (operand instanceof BinaryOperatorNode binOp) {
if (binOp.operator.equals("[")) {
Expand Down
25 changes: 4 additions & 21 deletions src/main/java/org/perlonjava/runtime/perlmodule/Utf8.java
Original file line number Diff line number Diff line change
Expand Up @@ -349,27 +349,10 @@ public static RuntimeList valid(RuntimeArray args, int ctx) {
String string = scalar.toString();

if (scalar.type == BYTE_STRING) {
// For byte strings, check if the bytes form valid UTF-8
// Extract raw byte values and try to decode as UTF-8
byte[] bytes = new byte[string.length()];
for (int i = 0; i < string.length(); i++) {
char c = string.charAt(i);
if (c > 0xFF) {
// Byte string should not contain chars > 0xFF
// This is an inconsistent state
return RuntimeScalarCache.scalarFalse.getList();
}
bytes[i] = (byte) c;
}
CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
try {
decoder.decode(ByteBuffer.wrap(bytes));
return RuntimeScalarCache.scalarTrue.getList();
} catch (CharacterCodingException e) {
return RuntimeScalarCache.scalarFalse.getList();
}
// For byte strings (UTF-8 flag off), Perl always returns true.
// The bytes are not claiming to be UTF-8, so they are considered
// valid in their native encoding (Latin-1/bytes).
return RuntimeScalarCache.scalarTrue.getList();
} else {
// For character strings (UTF-8 flag on), check if all characters are valid
// Unicode code points. Java strings contain UTF-16 code units, which
Expand Down
Loading