From 6d71a5b0f69ab7cef504bd1067081e461df28093 Mon Sep 17 00:00:00 2001 From: Scott Myron Date: Thu, 3 Jul 2025 22:15:19 -0500 Subject: [PATCH 1/9] Work in progress - Initial support for SIMD in the java module. --- Rakefile | 7 +- java/src/json/ext/EscapeScanner.java | 75 +++++++++ java/src/json/ext/Generator.java | 2 +- java/src/json/ext/StringEncoder.java | 145 +++++++++++++++--- .../src/json/ext/VectorizedEscapeScanner.java | 57 +++++++ 5 files changed, 260 insertions(+), 26 deletions(-) create mode 100644 java/src/json/ext/EscapeScanner.java create mode 100644 java/src/json/ext/VectorizedEscapeScanner.java diff --git a/Rakefile b/Rakefile index 5fc7fa6d..714f2836 100644 --- a/Rakefile +++ b/Rakefile @@ -68,7 +68,7 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby' classpath = (Dir['java/lib/*.jar'] << 'java/src' << JRUBY_JAR) * ':' obj = src.sub(/\.java\Z/, '.class') file obj => src do - sh 'javac', '-classpath', classpath, '-source', '1.8', '-target', '1.8', src + sh 'javac', '--enable-preview', '--add-modules', 'jdk.incubator.vector', '-classpath', classpath, '-source', '21', '-target', '21', src end JAVA_CLASSES << obj end @@ -117,11 +117,14 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby' generator_classes = FileList[ "json/ext/ByteList*.class", "json/ext/OptionsReader*.class", + "json/ext/EscapeScanner*.class", "json/ext/Generator*.class", "json/ext/RuntimeInfo*.class", "json/ext/StringEncoder*.class", - "json/ext/Utils*.class" + "json/ext/Utils*.class", + "json/ext/VectorizedEscapeScanner*.class" ] + puts "Creating generator jar with classes: #{generator_classes.join(', ')}" sh 'jar', 'cf', File.basename(JRUBY_GENERATOR_JAR), *generator_classes mv File.basename(JRUBY_GENERATOR_JAR), File.dirname(JRUBY_GENERATOR_JAR) end diff --git a/java/src/json/ext/EscapeScanner.java b/java/src/json/ext/EscapeScanner.java new file mode 100644 index 00000000..7e7aeb18 --- /dev/null +++ b/java/src/json/ext/EscapeScanner.java @@ -0,0 +1,75 @@ +package json.ext; + +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.util.Optional; + +interface EscapeScanner { + static class State { + byte[] ptrBytes; + int ptr; + int len; + int pos; + int beg; + int ch; + } + + static class VectorSupport { + static Constructor vectorizedEscapeScannerConstructor = null; + + static { + Optional vectorModule = ModuleLayer.boot().findModule("jdk.incubator.vector"); + if (vectorModule.isPresent()) { + try { + Class vectorEscapeScannerClass = EscapeScanner.class.getClassLoader().loadClass("json.ext.VectorizedEscapeScanner"); + vectorizedEscapeScannerConstructor = vectorEscapeScannerClass.getDeclaredConstructor(); + } catch (ClassNotFoundException | NoSuchMethodException e) { + // Fallback to the ScalarEscapeScanner if we cannot load the VectorizedEscapeScanner. + System.err.println("Failed to load VectorizedEscapeScanner, falling back to ScalarEscapeScanner: " + e.getMessage()); + } + } + } + } + + boolean scan(EscapeScanner.State state) throws java.io.IOException; + + public static EscapeScanner basicScanner() { + if (VectorSupport.vectorizedEscapeScannerConstructor != null) { + try { + // Attempt to instantiate the vectorized escape scanner if available. + return (EscapeScanner) VectorSupport.vectorizedEscapeScannerConstructor.newInstance(); + } catch (InstantiationException | IllegalAccessException | InvocationTargetException e) { + System.err.println("Failed to instantiate VectorizedEscapeScanner, falling back to ScalarEscapeScanner: " + e.getMessage()); + } + + } + + return new ScalarEscapeScanner(StringEncoder.ESCAPE_TABLE); + } + + public static EscapeScanner create(byte[] escapeTable) { + return new ScalarEscapeScanner(escapeTable); + } + + public static class ScalarEscapeScanner implements EscapeScanner { + private final byte[] escapeTable; + + public ScalarEscapeScanner(byte[] escapeTable) { + this.escapeTable = escapeTable; + } + + @Override + public boolean scan(EscapeScanner.State state) throws java.io.IOException { + while (state.pos < state.len) { + state.ch = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos]); + int ch_len = escapeTable[state.ch]; + if (ch_len > 0) { + return true; + } + state.pos++; + } + return false; + } + + } +} diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index 85250920..45f68e07 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -232,7 +232,7 @@ public StringEncoder getStringEncoder(ThreadContext context) { GeneratorState state = getState(context); stringEncoder = state.asciiOnly() ? new StringEncoderAsciiOnly(state.scriptSafe()) : - new StringEncoder(state.scriptSafe()); + state.scriptSafe() ? StringEncoder.scriptSafeEncoder() : StringEncoder.basicEncoder(); } return stringEncoder; } diff --git a/java/src/json/ext/StringEncoder.java b/java/src/json/ext/StringEncoder.java index d178d0bd..6e34bcee 100644 --- a/java/src/json/ext/StringEncoder.java +++ b/java/src/json/ext/StringEncoder.java @@ -5,6 +5,10 @@ */ package json.ext; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; + import org.jcodings.Encoding; import org.jcodings.specific.ASCIIEncoding; import org.jcodings.specific.USASCIIEncoding; @@ -17,9 +21,9 @@ import org.jruby.util.ByteList; import org.jruby.util.StringSupport; -import java.io.IOException; -import java.io.OutputStream; -import java.nio.charset.StandardCharsets; +import jdk.incubator.vector.ByteVector; +import jdk.incubator.vector.VectorSpecies; +import json.ext.VectorizedEscapeScanner; /** * An encoder that reads from the given source and outputs its representation @@ -130,7 +134,7 @@ class StringEncoder extends ByteListTranscoder { new byte[] {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; - StringEncoder(boolean scriptSafe) { + private StringEncoder(boolean scriptSafe) { this(scriptSafe ? SCRIPT_SAFE_ESCAPE_TABLE : ESCAPE_TABLE); } @@ -138,6 +142,14 @@ class StringEncoder extends ByteListTranscoder { this.escapeTable = escapeTable; } + public static StringEncoder scriptSafeEncoder() { + return new StringEncoder(SCRIPT_SAFE_ESCAPE_TABLE); + } + + public static StringEncoder basicEncoder() { + return new StringEncoder(ESCAPE_TABLE); + } + // C: generate_json_string void generate(ThreadContext context, RubyString object, OutputStream buffer) throws IOException { object = ensureValidEncoding(context, object); @@ -198,41 +210,89 @@ private static RubyString tryWeirdEncodings(ThreadContext context, RubyString st return str; } + boolean searchEscape(EscapeScanner.State state) throws IOException { + byte[] escapeTable = StringEncoder.this.escapeTable; + + while (state.pos < state.len) { + state.ch = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos]); + int ch_len = escapeTable[state.ch]; + + if (ch_len > 0) { + return true; + } + + state.pos++; + } + + return false; + } + + void encodeBasic(ByteList src) throws IOException { + EscapeScanner.State state = new EscapeScanner.State(); + state.ptrBytes = src.unsafeBytes(); + state.ptr = src.begin(); + state.len = src.realSize(); + state.beg = 0; + state.pos = 0; + + byte[] hexdig = HEX; + byte[] scratch = aux; + + EscapeScanner scanner = EscapeScanner.basicScanner(); + + while(scanner.scan(state)) { + int ch = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos]); + state.beg = state.pos = flushPos(state.pos, state.beg, state.ptrBytes, state.ptr, 1); + escapeAscii(ch, scratch, hexdig); + } + + if (state.beg < state.len) { + append(state.ptrBytes, state.ptr + state.beg, state.len - state.beg); + } + } + // C: convert_UTF8_to_JSON void encode(ByteList src) throws IOException { + if (this.escapeTable == StringEncoder.ESCAPE_TABLE) { + encodeBasic(src); + return; + } + byte[] hexdig = HEX; byte[] scratch = aux; byte[] escapeTable = this.escapeTable; - byte[] ptrBytes = src.unsafeBytes(); - int ptr = src.begin(); - int len = src.realSize(); - - int beg = 0; - int pos = 0; - - while (pos < len) { - int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]); + EscapeScanner.State state = new EscapeScanner.State(); + state.ptrBytes = src.unsafeBytes(); + state.ptr = src.begin(); + state.len = src.realSize(); + state.beg = 0; + state.pos = 0; + + while(searchEscape(state)) { + // We found an escape character, so we need to flush up to this point + // and then handle the escape character. + state.beg = flushPos(state.pos, state.beg, state.ptrBytes, state.ptr, 0); + int ch = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos]); int ch_len = escapeTable[ch]; - /* JSON encoding */ if (ch_len > 0) { switch (ch_len) { case 9: { - beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1); + state.beg = state.pos = flushPos(state.pos, state.beg, state.ptrBytes, state.ptr, 1); escapeAscii(ch, scratch, hexdig); break; } case 11: { - int b2 = Byte.toUnsignedInt(ptrBytes[ptr + pos + 1]); + int b2 = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos + 1]); if (b2 == 0x80) { - int b3 = Byte.toUnsignedInt(ptrBytes[ptr + pos + 2]); + int b3 = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos + 2]); if (b3 == 0xA8) { - beg = pos = flushPos(pos, beg, ptrBytes, ptr, 3); + state.beg = state.pos = flushPos(state.pos, state.beg, state.ptrBytes, state.ptr, 3); append(BACKSLASH_U2028, 0, 6); break; } else if (b3 == 0xA9) { - beg = pos = flushPos(pos, beg, ptrBytes, ptr, 3); + state.beg = state.pos = flushPos(state.pos, state.beg, state.ptrBytes, state.ptr, 3); append(BACKSLASH_U2029, 0, 6); break; } @@ -241,16 +301,55 @@ void encode(ByteList src) throws IOException { // fallthrough } default: - pos += ch_len; + state.pos += ch_len; break; } } else { - pos++; + // This should be unreachable. + state.pos++; } } - if (beg < len) { - append(ptrBytes, ptr + beg, len - beg); + // while (state.pos < state.len) { + // int ch = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos]); + // int ch_len = escapeTable[ch]; + // /* JSON encoding */ + + // if (ch_len > 0) { + // switch (ch_len) { + // case 9: { + // state.beg = state.pos = flushPos(state.pos, state.beg, state.ptrBytes, state.ptr, 1); + // escapeAscii(ch, scratch, hexdig); + // break; + // } + // case 11: { + // int b2 = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos + 1]); + // if (b2 == 0x80) { + // int b3 = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos + 2]); + // if (b3 == 0xA8) { + // state.beg = state.pos = flushPos(state.pos, state.beg, state.ptrBytes, state.ptr, 3); + // append(BACKSLASH_U2028, 0, 6); + // break; + // } else if (b3 == 0xA9) { + // state.beg = state.pos = flushPos(state.pos, state.beg, state.ptrBytes, state.ptr, 3); + // append(BACKSLASH_U2029, 0, 6); + // break; + // } + // } + // ch_len = 3; + // // fallthrough + // } + // default: + // state.pos += ch_len; + // break; + // } + // } else { + // state.pos++; + // } + // } + + if (state.beg < state.len) { + append(state.ptrBytes, state.ptr + state.beg, state.len - state.beg); } } diff --git a/java/src/json/ext/VectorizedEscapeScanner.java b/java/src/json/ext/VectorizedEscapeScanner.java new file mode 100644 index 00000000..ff7cd747 --- /dev/null +++ b/java/src/json/ext/VectorizedEscapeScanner.java @@ -0,0 +1,57 @@ +package json.ext; + +import java.io.IOException; + +import jdk.incubator.vector.ByteVector; +import jdk.incubator.vector.VectorMask; +import jdk.incubator.vector.VectorOperators; +import jdk.incubator.vector.VectorSpecies; + +public class VectorizedEscapeScanner implements EscapeScanner { + public static EscapeScanner.ScalarEscapeScanner FALLBACK = new EscapeScanner.ScalarEscapeScanner(StringEncoder.ESCAPE_TABLE); + + // private VectorMask needsEscape = null; + // private int chunkStart = 0; + + @Override + public boolean scan(State state) throws IOException { + VectorSpecies species = ByteVector.SPECIES_PREFERRED; + + // if (needsEscape != null) { + // if (needsEscape.anyTrue()) { + // int firstEscapeIndex = needsEscape.firstTrue(); + // needsEscape = needsEscape.andNot(VectorMask.fromLong(species, 1L << firstEscapeIndex)); + // state.pos = chunkStart + firstEscapeIndex; + // return true; + // } else { + // needsEscape = null; + // } + // } + + while ((state.ptr + state.pos) + species.length() < state.len) { + ByteVector chunk = ByteVector.fromArray(species, state.ptrBytes, state.ptr + state.pos); + ByteVector zero = ByteVector.broadcast(species, 0); + + // bytes are unsigned in java, so we need to check for negative values + // to determine if we have a byte that is less than 0 (>= 128). + VectorMask negative = zero.lt(chunk); + + VectorMask tooLowOrDblQuote = chunk.lanewise(VectorOperators.XOR, ByteVector.broadcast(species, 2)) + .lt(ByteVector.broadcast(species, 33)); + + VectorMask needsEscape = chunk.eq(ByteVector.broadcast(species, '\\')).or(tooLowOrDblQuote).and(negative); + if (needsEscape.anyTrue()) { + // chunkStart = state.ptr + state.pos; + int firstEscapeIndex = needsEscape.firstTrue(); + // Clear the bit at firstEscapeIndex to avoid scanning the same byte again + // needsEscape = needsEscape.andNot(VectorMask.fromLong(species, 1L << firstEscapeIndex)); + state.pos += firstEscapeIndex; + return true; + } + + state.pos += species.length(); + } + + return FALLBACK.scan(state); + } +} From 55aee212ceab0da4a0d49eeaef0c6c2a656e4a4b Mon Sep 17 00:00:00 2001 From: Scott Myron Date: Sat, 5 Jul 2025 12:08:41 -0500 Subject: [PATCH 2/9] Updated the vectorized scanner to more closely match the C implmeentation. --- java/src/json/ext/EscapeScanner.java | 31 +++++---- java/src/json/ext/StringEncoder.java | 13 ++-- .../src/json/ext/VectorizedEscapeScanner.java | 63 +++++++++++++------ 3 files changed, 70 insertions(+), 37 deletions(-) diff --git a/java/src/json/ext/EscapeScanner.java b/java/src/json/ext/EscapeScanner.java index 7e7aeb18..7b407ec0 100644 --- a/java/src/json/ext/EscapeScanner.java +++ b/java/src/json/ext/EscapeScanner.java @@ -15,33 +15,42 @@ static class State { } static class VectorSupport { - static Constructor vectorizedEscapeScannerConstructor = null; + static final EscapeScanner VECTORIZED_ESCAPE_SCANNER; static { Optional vectorModule = ModuleLayer.boot().findModule("jdk.incubator.vector"); + EscapeScanner scanner = null; if (vectorModule.isPresent()) { try { Class vectorEscapeScannerClass = EscapeScanner.class.getClassLoader().loadClass("json.ext.VectorizedEscapeScanner"); - vectorizedEscapeScannerConstructor = vectorEscapeScannerClass.getDeclaredConstructor(); - } catch (ClassNotFoundException | NoSuchMethodException e) { + Constructor vectorizedEscapeScannerConstructor = vectorEscapeScannerClass.getDeclaredConstructor(); + scanner = (EscapeScanner) vectorizedEscapeScannerConstructor.newInstance(); + } catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | IllegalAccessException | InvocationTargetException e) { // Fallback to the ScalarEscapeScanner if we cannot load the VectorizedEscapeScanner. System.err.println("Failed to load VectorizedEscapeScanner, falling back to ScalarEscapeScanner: " + e.getMessage()); + scanner = null; } + } + VECTORIZED_ESCAPE_SCANNER = scanner; } } boolean scan(EscapeScanner.State state) throws java.io.IOException; - public static EscapeScanner basicScanner() { - if (VectorSupport.vectorizedEscapeScannerConstructor != null) { - try { - // Attempt to instantiate the vectorized escape scanner if available. - return (EscapeScanner) VectorSupport.vectorizedEscapeScannerConstructor.newInstance(); - } catch (InstantiationException | IllegalAccessException | InvocationTargetException e) { - System.err.println("Failed to instantiate VectorizedEscapeScanner, falling back to ScalarEscapeScanner: " + e.getMessage()); - } + default State createState(byte[] ptrBytes, int ptr, int len, int beg) { + State state = new State(); + state.ptrBytes = ptrBytes; + state.ptr = ptr; + state.len = len; + state.beg = beg; + state.pos = 0; // Start scanning from the beginning of the segment + return state; + } + public static EscapeScanner basicScanner() { + if (VectorSupport.VECTORIZED_ESCAPE_SCANNER != null) { + return VectorSupport.VECTORIZED_ESCAPE_SCANNER; } return new ScalarEscapeScanner(StringEncoder.ESCAPE_TABLE); diff --git a/java/src/json/ext/StringEncoder.java b/java/src/json/ext/StringEncoder.java index 6e34bcee..496a9007 100644 --- a/java/src/json/ext/StringEncoder.java +++ b/java/src/json/ext/StringEncoder.java @@ -228,17 +228,18 @@ boolean searchEscape(EscapeScanner.State state) throws IOException { } void encodeBasic(ByteList src) throws IOException { - EscapeScanner.State state = new EscapeScanner.State(); - state.ptrBytes = src.unsafeBytes(); - state.ptr = src.begin(); - state.len = src.realSize(); - state.beg = 0; - state.pos = 0; + // EscapeScanner.State state = new EscapeScanner.State(); + // state.ptrBytes = src.unsafeBytes(); + // state.ptr = src.begin(); + // state.len = src.realSize(); + // state.beg = 0; + // state.pos = 0; byte[] hexdig = HEX; byte[] scratch = aux; EscapeScanner scanner = EscapeScanner.basicScanner(); + EscapeScanner.State state = scanner.createState(src.unsafeBytes(), src.begin(), src.realSize(), 0); while(scanner.scan(state)) { int ch = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos]); diff --git a/java/src/json/ext/VectorizedEscapeScanner.java b/java/src/json/ext/VectorizedEscapeScanner.java index ff7cd747..5bd43b52 100644 --- a/java/src/json/ext/VectorizedEscapeScanner.java +++ b/java/src/json/ext/VectorizedEscapeScanner.java @@ -1,32 +1,31 @@ package json.ext; import java.io.IOException; +import javax.naming.directory.NoSuchAttributeException; import jdk.incubator.vector.ByteVector; import jdk.incubator.vector.VectorMask; import jdk.incubator.vector.VectorOperators; import jdk.incubator.vector.VectorSpecies; +import jdk.jfr.RecordingState; public class VectorizedEscapeScanner implements EscapeScanner { public static EscapeScanner.ScalarEscapeScanner FALLBACK = new EscapeScanner.ScalarEscapeScanner(StringEncoder.ESCAPE_TABLE); - // private VectorMask needsEscape = null; - // private int chunkStart = 0; - @Override - public boolean scan(State state) throws IOException { + public boolean scan(State _state) throws IOException { VectorSpecies species = ByteVector.SPECIES_PREFERRED; - // if (needsEscape != null) { - // if (needsEscape.anyTrue()) { - // int firstEscapeIndex = needsEscape.firstTrue(); - // needsEscape = needsEscape.andNot(VectorMask.fromLong(species, 1L << firstEscapeIndex)); - // state.pos = chunkStart + firstEscapeIndex; - // return true; - // } else { - // needsEscape = null; - // } - // } + VectorizedState state = (VectorizedState) _state; + + if (state.hasMatches) { + if (state.mask > 0) { + return nextMatch(state); + } else { + state.hasMatches = false; + state.pos = state.chunkStart + species.length(); + } + } while ((state.ptr + state.pos) + species.length() < state.len) { ByteVector chunk = ByteVector.fromArray(species, state.ptrBytes, state.ptr + state.pos); @@ -41,12 +40,11 @@ public boolean scan(State state) throws IOException { VectorMask needsEscape = chunk.eq(ByteVector.broadcast(species, '\\')).or(tooLowOrDblQuote).and(negative); if (needsEscape.anyTrue()) { - // chunkStart = state.ptr + state.pos; - int firstEscapeIndex = needsEscape.firstTrue(); - // Clear the bit at firstEscapeIndex to avoid scanning the same byte again - // needsEscape = needsEscape.andNot(VectorMask.fromLong(species, 1L << firstEscapeIndex)); - state.pos += firstEscapeIndex; - return true; + state.hasMatches = true; + state.chunkStart = state.ptr + state.pos; + state.mask = needsEscape.toLong(); + + return nextMatch(state); } state.pos += species.length(); @@ -54,4 +52,29 @@ public boolean scan(State state) throws IOException { return FALLBACK.scan(state); } + + private boolean nextMatch(VectorizedState state) { + int index = Long.numberOfTrailingZeros(state.mask); + state.mask &= (state.mask - 1); + state.pos = state.chunkStart + index; + return true; + } + + @Override + public EscapeScanner.State createState(byte[] ptrBytes, int ptr, int len, int beg) { + VectorizedState state = new VectorizedState(); + state.ptrBytes = ptrBytes; + state.ptr = ptr; + state.len = len; + state.beg = beg; + state.pos = 0; + return state; + } + + private static class VectorizedState extends State { + private long mask; + private int chunkStart = 0; + // private int lastMatchingIndex; + private boolean hasMatches; + } } From 8e42c0fd29d362661d2d63ef7b0c2f850172ec9b Mon Sep 17 00:00:00 2001 From: Scott Myron Date: Sat, 5 Jul 2025 12:16:06 -0500 Subject: [PATCH 3/9] Cleanups. --- java/src/json/ext/VectorizedEscapeScanner.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/java/src/json/ext/VectorizedEscapeScanner.java b/java/src/json/ext/VectorizedEscapeScanner.java index 5bd43b52..3f4f6196 100644 --- a/java/src/json/ext/VectorizedEscapeScanner.java +++ b/java/src/json/ext/VectorizedEscapeScanner.java @@ -1,13 +1,11 @@ package json.ext; import java.io.IOException; -import javax.naming.directory.NoSuchAttributeException; import jdk.incubator.vector.ByteVector; import jdk.incubator.vector.VectorMask; import jdk.incubator.vector.VectorOperators; import jdk.incubator.vector.VectorSpecies; -import jdk.jfr.RecordingState; public class VectorizedEscapeScanner implements EscapeScanner { public static EscapeScanner.ScalarEscapeScanner FALLBACK = new EscapeScanner.ScalarEscapeScanner(StringEncoder.ESCAPE_TABLE); @@ -29,11 +27,10 @@ public boolean scan(State _state) throws IOException { while ((state.ptr + state.pos) + species.length() < state.len) { ByteVector chunk = ByteVector.fromArray(species, state.ptrBytes, state.ptr + state.pos); - ByteVector zero = ByteVector.broadcast(species, 0); // bytes are unsigned in java, so we need to check for negative values // to determine if we have a byte that is less than 0 (>= 128). - VectorMask negative = zero.lt(chunk); + VectorMask negative = ByteVector.zero(species).lt(chunk); VectorMask tooLowOrDblQuote = chunk.lanewise(VectorOperators.XOR, ByteVector.broadcast(species, 2)) .lt(ByteVector.broadcast(species, 33)); From 700826b434d78eee3ffd0a33a0cd511401ffc6c2 Mon Sep 17 00:00:00 2001 From: Scott Myron Date: Sun, 6 Jul 2025 20:53:03 -0500 Subject: [PATCH 4/9] WIP --- Rakefile | 4 +++- java/src/json/ext/EscapeScanner.java | 21 ++++++++----------- java/src/json/ext/StringEncoder.java | 4 ---- .../VectorizedEscapeScanner.java | 2 +- 4 files changed, 13 insertions(+), 18 deletions(-) rename java/src/json/ext/{ => vectorized}/VectorizedEscapeScanner.java (98%) diff --git a/Rakefile b/Rakefile index 714f2836..cea15744 100644 --- a/Rakefile +++ b/Rakefile @@ -16,6 +16,7 @@ JAVA_DIR = "java/src/json/ext" JAVA_RAGEL_PATH = "#{JAVA_DIR}/ParserConfig.rl" JAVA_PARSER_SRC = "#{JAVA_DIR}/ParserConfig.java" JAVA_SOURCES = FileList["#{JAVA_DIR}/*.java"] +JAVA_VEC_SOURCES = FileList["#{JAVA_DIR}/vectorized/*.java"] JAVA_CLASSES = [] JRUBY_PARSER_JAR = File.expand_path("lib/json/ext/parser.jar") JRUBY_GENERATOR_JAR = File.expand_path("lib/json/ext/generator.jar") @@ -68,7 +69,8 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby' classpath = (Dir['java/lib/*.jar'] << 'java/src' << JRUBY_JAR) * ':' obj = src.sub(/\.java\Z/, '.class') file obj => src do - sh 'javac', '--enable-preview', '--add-modules', 'jdk.incubator.vector', '-classpath', classpath, '-source', '21', '-target', '21', src + sh 'javac', '-classpath', classpath, '-source', '1.8', '-target', '1.8', src + # '--enable-preview', '--add-modules', 'jdk.incubator.vector', end JAVA_CLASSES << obj end diff --git a/java/src/json/ext/EscapeScanner.java b/java/src/json/ext/EscapeScanner.java index 7b407ec0..3f4d1390 100644 --- a/java/src/json/ext/EscapeScanner.java +++ b/java/src/json/ext/EscapeScanner.java @@ -15,22 +15,19 @@ static class State { } static class VectorSupport { + private static String VECTORIZED_ESCAPE_SCANNER_CLASS = "json.ext.vectorized.VectorizedEscapeScanner"; static final EscapeScanner VECTORIZED_ESCAPE_SCANNER; static { - Optional vectorModule = ModuleLayer.boot().findModule("jdk.incubator.vector"); EscapeScanner scanner = null; - if (vectorModule.isPresent()) { - try { - Class vectorEscapeScannerClass = EscapeScanner.class.getClassLoader().loadClass("json.ext.VectorizedEscapeScanner"); - Constructor vectorizedEscapeScannerConstructor = vectorEscapeScannerClass.getDeclaredConstructor(); - scanner = (EscapeScanner) vectorizedEscapeScannerConstructor.newInstance(); - } catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | IllegalAccessException | InvocationTargetException e) { - // Fallback to the ScalarEscapeScanner if we cannot load the VectorizedEscapeScanner. - System.err.println("Failed to load VectorizedEscapeScanner, falling back to ScalarEscapeScanner: " + e.getMessage()); - scanner = null; - } - + try { + Class vectorEscapeScannerClass = EscapeScanner.class.getClassLoader().loadClass(VECTORIZED_ESCAPE_SCANNER_CLASS); + Constructor vectorizedEscapeScannerConstructor = vectorEscapeScannerClass.getDeclaredConstructor(); + scanner = (EscapeScanner) vectorizedEscapeScannerConstructor.newInstance(); + } catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | IllegalAccessException | InvocationTargetException e) { + // Fallback to the ScalarEscapeScanner if we cannot load the VectorizedEscapeScanner. + System.err.println("Failed to load VectorizedEscapeScanner, falling back to ScalarEscapeScanner: " + e.getMessage()); + scanner = null; } VECTORIZED_ESCAPE_SCANNER = scanner; } diff --git a/java/src/json/ext/StringEncoder.java b/java/src/json/ext/StringEncoder.java index 496a9007..c188d897 100644 --- a/java/src/json/ext/StringEncoder.java +++ b/java/src/json/ext/StringEncoder.java @@ -21,10 +21,6 @@ import org.jruby.util.ByteList; import org.jruby.util.StringSupport; -import jdk.incubator.vector.ByteVector; -import jdk.incubator.vector.VectorSpecies; -import json.ext.VectorizedEscapeScanner; - /** * An encoder that reads from the given source and outputs its representation * to another ByteList. The source string is fully checked for UTF-8 validity, diff --git a/java/src/json/ext/VectorizedEscapeScanner.java b/java/src/json/ext/vectorized/VectorizedEscapeScanner.java similarity index 98% rename from java/src/json/ext/VectorizedEscapeScanner.java rename to java/src/json/ext/vectorized/VectorizedEscapeScanner.java index 3f4f6196..2839e760 100644 --- a/java/src/json/ext/VectorizedEscapeScanner.java +++ b/java/src/json/ext/vectorized/VectorizedEscapeScanner.java @@ -1,4 +1,4 @@ -package json.ext; +package json.ext.vectorized; import java.io.IOException; From 51264df6db5f5b7f27aa2e4340ede602ab59e6b1 Mon Sep 17 00:00:00 2001 From: Scott Myron Date: Mon, 7 Jul 2025 08:34:29 -0500 Subject: [PATCH 5/9] Skip the vectorized code if it cannot be compiled. --- Rakefile | 24 +++++++++++++++---- java/src/json/ext/EscapeScanner.java | 1 - .../VectorizedEscapeScanner.java | 4 ++-- 3 files changed, 21 insertions(+), 8 deletions(-) rename java/src/json/ext/{vectorized => }/VectorizedEscapeScanner.java (96%) diff --git a/Rakefile b/Rakefile index cea15744..5ed2bdc1 100644 --- a/Rakefile +++ b/Rakefile @@ -15,8 +15,8 @@ end rescue nil JAVA_DIR = "java/src/json/ext" JAVA_RAGEL_PATH = "#{JAVA_DIR}/ParserConfig.rl" JAVA_PARSER_SRC = "#{JAVA_DIR}/ParserConfig.java" -JAVA_SOURCES = FileList["#{JAVA_DIR}/*.java"] -JAVA_VEC_SOURCES = FileList["#{JAVA_DIR}/vectorized/*.java"] +JAVA_SOURCES = FileList["#{JAVA_DIR}/*.java"].exclude("#{JAVA_DIR}/Vectorized*.java") +JAVA_VEC_SOURCES = FileList["#{JAVA_DIR}/Vectorized*.java"] JAVA_CLASSES = [] JRUBY_PARSER_JAR = File.expand_path("lib/json/ext/parser.jar") JRUBY_GENERATOR_JAR = File.expand_path("lib/json/ext/generator.jar") @@ -65,12 +65,26 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby' JRUBY_JAR = File.join(CONFIG["libdir"], "jruby.jar") if File.exist?(JRUBY_JAR) + classpath = (Dir['java/lib/*.jar'] << 'java/src' << JRUBY_JAR) * ':' JAVA_SOURCES.each do |src| - classpath = (Dir['java/lib/*.jar'] << 'java/src' << JRUBY_JAR) * ':' obj = src.sub(/\.java\Z/, '.class') file obj => src do - sh 'javac', '-classpath', classpath, '-source', '1.8', '-target', '1.8', src - # '--enable-preview', '--add-modules', 'jdk.incubator.vector', + sh 'javac', '-classpath', classpath, '-source', '1.8', '-target', '1.8', src + # '--enable-preview', + end + JAVA_CLASSES << obj + end + + JAVA_VEC_SOURCES.each do |src| + obj = src.sub(/\.java\Z/, '.class') + file obj => src do + sh 'javac', '--add-modules', 'jdk.incubator.vector', '-classpath', classpath, '--release', '16', src do |success, status| + if success + puts "*** 'jdk.incubator.vector' support enabled ***" + else + puts "*** 'jdk.incubator.vector' support disabled ***" + end + end end JAVA_CLASSES << obj end diff --git a/java/src/json/ext/EscapeScanner.java b/java/src/json/ext/EscapeScanner.java index 3f4d1390..ee487079 100644 --- a/java/src/json/ext/EscapeScanner.java +++ b/java/src/json/ext/EscapeScanner.java @@ -2,7 +2,6 @@ import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; -import java.util.Optional; interface EscapeScanner { static class State { diff --git a/java/src/json/ext/vectorized/VectorizedEscapeScanner.java b/java/src/json/ext/VectorizedEscapeScanner.java similarity index 96% rename from java/src/json/ext/vectorized/VectorizedEscapeScanner.java rename to java/src/json/ext/VectorizedEscapeScanner.java index 2839e760..5cb64a7c 100644 --- a/java/src/json/ext/vectorized/VectorizedEscapeScanner.java +++ b/java/src/json/ext/VectorizedEscapeScanner.java @@ -1,4 +1,4 @@ -package json.ext.vectorized; +package json.ext; import java.io.IOException; @@ -7,7 +7,7 @@ import jdk.incubator.vector.VectorOperators; import jdk.incubator.vector.VectorSpecies; -public class VectorizedEscapeScanner implements EscapeScanner { +class VectorizedEscapeScanner implements EscapeScanner { public static EscapeScanner.ScalarEscapeScanner FALLBACK = new EscapeScanner.ScalarEscapeScanner(StringEncoder.ESCAPE_TABLE); @Override From 53a5a88344fb70573b1996ddae62f3586c4e9866 Mon Sep 17 00:00:00 2001 From: Scott Myron Date: Mon, 7 Jul 2025 08:48:22 -0500 Subject: [PATCH 6/9] Added a system property to enable vectorized scanning and fix a bug after refactoring the vectorized class. --- java/src/json/ext/EscapeScanner.java | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/java/src/json/ext/EscapeScanner.java b/java/src/json/ext/EscapeScanner.java index ee487079..a1ce0c4a 100644 --- a/java/src/json/ext/EscapeScanner.java +++ b/java/src/json/ext/EscapeScanner.java @@ -14,19 +14,27 @@ static class State { } static class VectorSupport { - private static String VECTORIZED_ESCAPE_SCANNER_CLASS = "json.ext.vectorized.VectorizedEscapeScanner"; + private static String VECTORIZED_ESCAPE_SCANNER_CLASS = "json.ext.VectorizedEscapeScanner"; + private static String VECTORIZED_SCANNER_PROP = "json.enableVectorizedEscapeScanner"; + private static String VECTORIZED_SCANNER_DEFAULT = "false"; static final EscapeScanner VECTORIZED_ESCAPE_SCANNER; static { EscapeScanner scanner = null; - try { - Class vectorEscapeScannerClass = EscapeScanner.class.getClassLoader().loadClass(VECTORIZED_ESCAPE_SCANNER_CLASS); - Constructor vectorizedEscapeScannerConstructor = vectorEscapeScannerClass.getDeclaredConstructor(); - scanner = (EscapeScanner) vectorizedEscapeScannerConstructor.newInstance(); - } catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | IllegalAccessException | InvocationTargetException e) { - // Fallback to the ScalarEscapeScanner if we cannot load the VectorizedEscapeScanner. - System.err.println("Failed to load VectorizedEscapeScanner, falling back to ScalarEscapeScanner: " + e.getMessage()); - scanner = null; + String enableVectorizedScanner = System.getProperty(VECTORIZED_SCANNER_PROP, VECTORIZED_SCANNER_DEFAULT); + if ("true".equalsIgnoreCase(enableVectorizedScanner) || "1".equalsIgnoreCase(enableVectorizedScanner)) { + try { + Class vectorEscapeScannerClass = EscapeScanner.class.getClassLoader().loadClass(VECTORIZED_ESCAPE_SCANNER_CLASS); + Constructor vectorizedEscapeScannerConstructor = vectorEscapeScannerClass.getDeclaredConstructor(); + scanner = (EscapeScanner) vectorizedEscapeScannerConstructor.newInstance(); + } catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | IllegalAccessException | InvocationTargetException e) { + // Fallback to the ScalarEscapeScanner if we cannot load the VectorizedEscapeScanner. + System.err.println("Failed to load VectorizedEscapeScanner, falling back to ScalarEscapeScanner:"); + e.printStackTrace(); + scanner = null; + } + } else { + System.err.println("VectorizedEscapeScanner disabled."); } VECTORIZED_ESCAPE_SCANNER = scanner; } From 3e89dd780481db63794ac100319861bf354d5753 Mon Sep 17 00:00:00 2001 From: Scott Myron Date: Mon, 7 Jul 2025 09:34:45 -0500 Subject: [PATCH 7/9] Cleanups. --- java/src/json/ext/StringEncoder.java | 45 ------------------- .../src/json/ext/VectorizedEscapeScanner.java | 4 +- 2 files changed, 2 insertions(+), 47 deletions(-) diff --git a/java/src/json/ext/StringEncoder.java b/java/src/json/ext/StringEncoder.java index c188d897..8a4cef06 100644 --- a/java/src/json/ext/StringEncoder.java +++ b/java/src/json/ext/StringEncoder.java @@ -224,13 +224,6 @@ boolean searchEscape(EscapeScanner.State state) throws IOException { } void encodeBasic(ByteList src) throws IOException { - // EscapeScanner.State state = new EscapeScanner.State(); - // state.ptrBytes = src.unsafeBytes(); - // state.ptr = src.begin(); - // state.len = src.realSize(); - // state.beg = 0; - // state.pos = 0; - byte[] hexdig = HEX; byte[] scratch = aux; @@ -307,44 +300,6 @@ void encode(ByteList src) throws IOException { } } - // while (state.pos < state.len) { - // int ch = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos]); - // int ch_len = escapeTable[ch]; - // /* JSON encoding */ - - // if (ch_len > 0) { - // switch (ch_len) { - // case 9: { - // state.beg = state.pos = flushPos(state.pos, state.beg, state.ptrBytes, state.ptr, 1); - // escapeAscii(ch, scratch, hexdig); - // break; - // } - // case 11: { - // int b2 = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos + 1]); - // if (b2 == 0x80) { - // int b3 = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos + 2]); - // if (b3 == 0xA8) { - // state.beg = state.pos = flushPos(state.pos, state.beg, state.ptrBytes, state.ptr, 3); - // append(BACKSLASH_U2028, 0, 6); - // break; - // } else if (b3 == 0xA9) { - // state.beg = state.pos = flushPos(state.pos, state.beg, state.ptrBytes, state.ptr, 3); - // append(BACKSLASH_U2029, 0, 6); - // break; - // } - // } - // ch_len = 3; - // // fallthrough - // } - // default: - // state.pos += ch_len; - // break; - // } - // } else { - // state.pos++; - // } - // } - if (state.beg < state.len) { append(state.ptrBytes, state.ptr + state.beg, state.len - state.beg); } diff --git a/java/src/json/ext/VectorizedEscapeScanner.java b/java/src/json/ext/VectorizedEscapeScanner.java index 5cb64a7c..2228139e 100644 --- a/java/src/json/ext/VectorizedEscapeScanner.java +++ b/java/src/json/ext/VectorizedEscapeScanner.java @@ -30,12 +30,12 @@ public boolean scan(State _state) throws IOException { // bytes are unsigned in java, so we need to check for negative values // to determine if we have a byte that is less than 0 (>= 128). - VectorMask negative = ByteVector.zero(species).lt(chunk); + VectorMask nonNegative = ByteVector.zero(species).lt(chunk); VectorMask tooLowOrDblQuote = chunk.lanewise(VectorOperators.XOR, ByteVector.broadcast(species, 2)) .lt(ByteVector.broadcast(species, 33)); - VectorMask needsEscape = chunk.eq(ByteVector.broadcast(species, '\\')).or(tooLowOrDblQuote).and(negative); + VectorMask needsEscape = chunk.eq(ByteVector.broadcast(species, '\\')).or(tooLowOrDblQuote).and(nonNegative); if (needsEscape.anyTrue()) { state.hasMatches = true; state.chunkStart = state.ptr + state.pos; From 22d2c761bb29726718625953a4077e0ff9716617 Mon Sep 17 00:00:00 2001 From: Scott Myron Date: Wed, 9 Jul 2025 20:42:42 -0500 Subject: [PATCH 8/9] Add a BasicScanner that doesn't use a lookup table. --- java/src/json/ext/EscapeScanner.java | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/java/src/json/ext/EscapeScanner.java b/java/src/json/ext/EscapeScanner.java index a1ce0c4a..9d472cf0 100644 --- a/java/src/json/ext/EscapeScanner.java +++ b/java/src/json/ext/EscapeScanner.java @@ -64,6 +64,20 @@ public static EscapeScanner create(byte[] escapeTable) { return new ScalarEscapeScanner(escapeTable); } + public static class BasicScanner implements EscapeScanner { + @Override + public boolean scan(EscapeScanner.State state) throws java.io.IOException { + while (state.pos < state.len) { + state.ch = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos]); + if (state.ch >= 0 && (state.ch < ' ' || state.ch == '\"' || state.ch == '\\')) { + return true; + } + state.pos++; + } + return false; + } + } + public static class ScalarEscapeScanner implements EscapeScanner { private final byte[] escapeTable; From 4895a35ccc8186a39a89f2fbcc82959307124713 Mon Sep 17 00:00:00 2001 From: Scott Myron Date: Mon, 14 Jul 2025 22:05:23 -0500 Subject: [PATCH 9/9] Fixed a bug parsing UTF8 encoded strings. Additionally some refactoring. --- .../src/json/ext/VectorizedEscapeScanner.java | 76 ++++++++++++------- 1 file changed, 49 insertions(+), 27 deletions(-) diff --git a/java/src/json/ext/VectorizedEscapeScanner.java b/java/src/json/ext/VectorizedEscapeScanner.java index 2228139e..e5a8f003 100644 --- a/java/src/json/ext/VectorizedEscapeScanner.java +++ b/java/src/json/ext/VectorizedEscapeScanner.java @@ -8,70 +8,92 @@ import jdk.incubator.vector.VectorSpecies; class VectorizedEscapeScanner implements EscapeScanner { - public static EscapeScanner.ScalarEscapeScanner FALLBACK = new EscapeScanner.ScalarEscapeScanner(StringEncoder.ESCAPE_TABLE); + private static final VectorSpecies SP = ByteVector.SPECIES_PREFERRED; + private static final ByteVector ZERO = ByteVector.zero(SP); + private static final ByteVector TWO = ByteVector.broadcast(SP, 2); + private static final ByteVector THIRTY_THREE = ByteVector.broadcast(SP, 33); + private static final ByteVector BACKSLASH = ByteVector.broadcast(SP, '\\'); @Override - public boolean scan(State _state) throws IOException { - VectorSpecies species = ByteVector.SPECIES_PREFERRED; - - VectorizedState state = (VectorizedState) _state; + public boolean scan(State _st) throws IOException { + VectorizedState state = (VectorizedState) _st; if (state.hasMatches) { if (state.mask > 0) { - return nextMatch(state); + // nextMatch inlined + int index = Long.numberOfTrailingZeros(state.mask); + state.mask &= (state.mask - 1); + state.pos = state.chunkStart + index; + state.ch = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos]); + return true; } else { state.hasMatches = false; - state.pos = state.chunkStart + species.length(); + state.pos = state.chunkStart + state.chunkLength; } } - while ((state.ptr + state.pos) + species.length() < state.len) { - ByteVector chunk = ByteVector.fromArray(species, state.ptrBytes, state.ptr + state.pos); + while (((state.ptr + state.pos) + SP.length() < state.len)) { + ByteVector chunk = ByteVector.fromArray(SP, state.ptrBytes, state.ptr + state.pos); + state.chunkLength = SP.length(); // bytes are unsigned in java, so we need to check for negative values // to determine if we have a byte that is less than 0 (>= 128). - VectorMask nonNegative = ByteVector.zero(species).lt(chunk); - - VectorMask tooLowOrDblQuote = chunk.lanewise(VectorOperators.XOR, ByteVector.broadcast(species, 2)) - .lt(ByteVector.broadcast(species, 33)); - - VectorMask needsEscape = chunk.eq(ByteVector.broadcast(species, '\\')).or(tooLowOrDblQuote).and(nonNegative); + VectorMask negative = chunk.lt(ZERO); + VectorMask tooLowOrDblQuote = chunk.lanewise(VectorOperators.XOR, TWO).lt(THIRTY_THREE).andNot(negative); + VectorMask needsEscape = chunk.eq(BACKSLASH).or(tooLowOrDblQuote); if (needsEscape.anyTrue()) { state.hasMatches = true; state.chunkStart = state.ptr + state.pos; state.mask = needsEscape.toLong(); - return nextMatch(state); + // nextMatch - inlined + int index = Long.numberOfTrailingZeros(state.mask); + state.mask &= (state.mask - 1); + state.pos = state.chunkStart + index; + state.ch = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos]); + + return true; } - state.pos += species.length(); + state.pos += SP.length(); } - return FALLBACK.scan(state); - } + int remaining = state.len - (state.ptr + state.pos); + for (int i=0; i 0) { + return true; + } + state.pos++; + } - private boolean nextMatch(VectorizedState state) { - int index = Long.numberOfTrailingZeros(state.mask); - state.mask &= (state.mask - 1); - state.pos = state.chunkStart + index; - return true; + return false; } + // private boolean nextMatch(VectorizedState state) { + // int index = Long.numberOfTrailingZeros(state.mask); + // state.mask &= (state.mask - 1); + // state.pos = state.chunkStart + index; + // state.ch = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos]); + // return true; + // } + @Override - public EscapeScanner.State createState(byte[] ptrBytes, int ptr, int len, int beg) { + public State createState(byte[] ptrBytes, int ptr, int len, int beg) { VectorizedState state = new VectorizedState(); state.ptrBytes = ptrBytes; state.ptr = ptr; state.len = len; state.beg = beg; - state.pos = 0; + state.pos = 0; // Start scanning from the beginning of the segment return state; } private static class VectorizedState extends State { private long mask; private int chunkStart = 0; - // private int lastMatchingIndex; private boolean hasMatches; + private int chunkLength; } }