OpenLiberty · ngmr · Feb 24, 2026 · Feb 2, 2026 · Feb 3, 2026 · Feb 3, 2026
diff --git a/...org/apache/yoko/orb/codecs/CharCodec.java → ...ava/org/apache/yoko/codecs/CharCodec.java b/...org/apache/yoko/orb/codecs/CharCodec.java → ...ava/org/apache/yoko/codecs/CharCodec.java
@@ -16,19 +16,14 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-package org.apache.yoko.orb.codecs;
+package org.apache.yoko.codecs;
 
 import org.apache.yoko.io.ReadBuffer;
 import org.apache.yoko.io.WriteBuffer;
 import org.apache.yoko.orb.OB.CodeSetInfo;
 import org.omg.CORBA.DATA_CONVERSION;
 
-import java.nio.charset.Charset;
-import java.nio.charset.IllegalCharsetNameException;
-import java.nio.charset.UnsupportedCharsetException;
-
-import static org.apache.yoko.orb.codecs.LatinCodec.getLatinCodec;
-import static org.apache.yoko.orb.codecs.Util.getUnicodeCodec;
+import static org.apache.yoko.codecs.LatinCodec.getLatinCodec;
 import static org.apache.yoko.util.MinorCodes.MinorUTF8Encoding;
 import static org.apache.yoko.util.MinorCodes.MinorUTF8Overflow;
 import static org.omg.CORBA.CompletionStatus.COMPLETED_MAYBE;
@@ -80,40 +75,31 @@
  * </p>
  */
 public interface CharCodec {
-    @FunctionalInterface interface CharReader { char readChar(ReadBuffer in); }
+
+    String name();
+
+    CodeSetInfo getCodeSetInfo();
 
     /**
-     * Get a char codec instance for the named Java charset.
-     *
-     * @param name the name of the Java charset for which a codec is required
-     * @return an instance of the appropriate char codec
-     * @throws IllegalCharsetNameException if the provided name is not a valid charset name
-     * @throws IllegalArgumentException if the provided name is null
-     * @throws UnsupportedCharsetException if the named charset is not supported
+     * Returns true iff the encoding always uses the same number of octets per char
      */
-    static CharCodec forName(String name) throws IllegalCharsetNameException, IllegalArgumentException, UnsupportedCharsetException {
-        // fastest result: directly named unicode codec
-        CharCodec result = getUnicodeCodec(name);
-        if (null != result) return result;
-        // next see if it is an alias for a unicode codec
-        Charset charset = Charset.forName(name);
-        result = getUnicodeCodec(charset.name());
-        if (null != result) return result;
-        // the only other codecs currently supported are the Latin ones
-        return getLatinCodec(charset);
-    }
+    default boolean isFixedWidth() { return true; }
 
-    static CharCodec forRegistryId(int id) throws UnsupportedCharsetException {
-        CodeSetInfo csi = CodeSetInfo.forRegistryId(id);
-        switch (csi) {
-            case UTF_16: return SimpleWcharCodec.UTF_16;
-            case UTF_8: return new Utf8Codec();
-
-        }
-        throw new UnsupportedCharsetException("Charset registry id = " + id);
-    }
+    /**
+     * Returns the number of octets per char iff {@link #isFixedWidth()} returns <code>true</code>
+     * @throws UnsupportedOperationException for non-fixed-width encodings
+     */
+    default int charSize() { return 1; }
+    /**
+     * Read the next char.
+     * @throws IndexOutOfBoundsException if the buffer does not contain enough bytes to read a single char
+     */
+    char readChar(ReadBuffer in);
 
-    String name();
+    /**
+     * Gives the number of octets needed to encode the specified char.
+     */
+    default int octetCount(char c) { return 1; }
 
     /**
      * Encodes a character to a buffer.
@@ -133,10 +119,6 @@ static CharCodec forRegistryId(int id) throws UnsupportedCharsetException {
      */
     void writeChar(char c, WriteBuffer out);
 
-    /** Read the next char */
-    char readChar(ReadBuffer in);
-
-
     /**
      * Check there is no unfinished character data.
      * This is only relevant for encodings that encode
@@ -155,6 +137,8 @@ default void assertNoBufferedCharData() throws DATA_CONVERSION {
     /** Check whether the last character was not a high surrogate. */
     default boolean writeFinished() { return true; }
 
-    /** Provides an identical object that can be used concurrently with this one */
-    default CharCodec getInstanceOrCopy() { return this; }
+    /**
+     * Provides an identical object that can be used concurrently with this one
+     */
+    default CharCodec duplicate() { return this; }
 }
diff --git a/yoko-core/src/main/java/org/apache/yoko/codecs/Codex.java b/yoko-core/src/main/java/org/apache/yoko/codecs/Codex.java
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2026 IBM Corporation and others.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an \"AS IS\" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+package org.apache.yoko.codecs;
+
+import org.apache.yoko.orb.OB.CodeSetInfo;
+
+import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
+import java.nio.charset.UnsupportedCharsetException;
+import java.util.Optional;
+
+import static org.apache.yoko.codecs.LatinCodec.getLatinCodec;
+import static org.apache.yoko.codecs.SimpleWcharCodec.UTF_16;
+import static org.apache.yoko.codecs.Util.getUnicodeCharCodec;
+
+public enum Codex {
+    ;
+
+    public static CharCodec getCollocatedCharCodec() { return SimpleWcharCodec.COLLOCATED; }
+
+    public static CharCodec getDefaultCharCodec() { return SimpleCharCodec.ISO_LATIN_1; }
+
+    /**
+     * Get a char codec instance for the named Java charset.
+     *
+     * @param charsetName the charsetName of the Java charset for which a codec is required
+     * @return an instance of the appropriate char codec
+     * @throws IllegalCharsetNameException if the provided charsetName is not a valid charset charsetName
+     * @throws IllegalArgumentException if the provided charsetName is null
+     * @throws UnsupportedCharsetException if the named charset is not supported
+     */
+    public static CharCodec getCharCodec(String charsetName) throws IllegalCharsetNameException, IllegalArgumentException, UnsupportedCharsetException {
+        // fastest result: directly named unicode codec
+        CharCodec result = getUnicodeCharCodec(charsetName);
+        if (null != result) return result;
+        // next see if it is an alias for a unicode codec
+        Charset charset = Charset.forName(charsetName);
+        result = getUnicodeCharCodec(charset.name());
+        if (null != result) return result;
+        // the only other codecs currently supported are the Latin ones
+        return getLatinCodec(charset);
+    }
+
+    public static CharCodec getCharCodec(int id) throws UnsupportedCharsetException {
+        CodeSetInfo csi = CodeSetInfo.forRegistryId(id);
+        if (null == csi) throw new UnsupportedCharsetException(String.format("Unknown registry id: 0x%08x", id));
+        switch (csi) {
+            case UTF_8: return new Utf8Codec();
+            case ISO_LATIN_1: return SimpleCharCodec.ISO_LATIN_1;
+            default: return LatinCodec.getLatinCodec(csi); // throws if unknown
+        }
+    }
+
+    public static WcharCodec getCollocatedWcharCodec() { return SimpleWcharCodec.COLLOCATED; }
+
+    public static WcharCodec getDefaultWcharCodec() { return UTF_16; }
+
+    public static WcharCodec getUnspecifiedWcharCodec() { return SimpleWcharCodec.UNSPECIFIED; }
+
+    public static WcharCodec getWcharCodec(String charsetName) {
+        if (charsetName == null) throw new NullPointerException();
+        if ("UTF-16".equalsIgnoreCase(charsetName)) return UTF_16;
+        if ("UTF-16".equalsIgnoreCase(Charset.forName(charsetName).name())) return UTF_16;
+        throw new UnsupportedCharsetException(charsetName + " not supported for wchar");
+    }
+
+    public static WcharCodec getWcharCodec(int twcsId) {
+        if (CodeSetInfo.UTF_16.id == twcsId) return UTF_16;
+        String message = Optional.ofNullable(CodeSetInfo.forRegistryId(twcsId))
+                .map(info -> String.format("Charset %s unsupported for wchar", info.name()))
+                .orElse(String.format("Unknown registry id 0x%08x unsupported for wchar", twcsId));
+        throw new UnsupportedCharsetException(message);
+    }
+}
diff --git a/...rg/apache/yoko/orb/codecs/LatinCodec.java → ...va/org/apache/yoko/codecs/LatinCodec.java b/...rg/apache/yoko/orb/codecs/LatinCodec.java → ...va/org/apache/yoko/codecs/LatinCodec.java
@@ -15,7 +15,7 @@
  *
  * SPDX-License-Identifier: Apache-2.0
  */
-package org.apache.yoko.orb.codecs;
+package org.apache.yoko.codecs;
 
 import org.apache.yoko.io.ReadBuffer;
 import org.apache.yoko.io.WriteBuffer;
@@ -32,8 +32,16 @@
 import static java.nio.ByteBuffer.allocate;
 import static java.util.Collections.unmodifiableMap;
 import static java.util.stream.IntStream.range;
-import static org.apache.yoko.orb.codecs.Util.ASCII_REPLACEMENT_BYTE;
-import static org.apache.yoko.orb.codecs.Util.UNICODE_REPLACEMENT_CHAR;
+import static org.apache.yoko.codecs.Util.ASCII_REPLACEMENT_BYTE;
+import static org.apache.yoko.codecs.Util.UNICODE_REPLACEMENT_CHAR;
+import static org.apache.yoko.orb.OB.CodeSetInfo.ISO_8859_5;
+import static org.apache.yoko.orb.OB.CodeSetInfo.ISO_8859_6;
+import static org.apache.yoko.orb.OB.CodeSetInfo.ISO_8859_7;
+import static org.apache.yoko.orb.OB.CodeSetInfo.ISO_8859_8;
+import static org.apache.yoko.orb.OB.CodeSetInfo.ISO_8859_9;
+import static org.apache.yoko.orb.OB.CodeSetInfo.ISO_LATIN_2;
+import static org.apache.yoko.orb.OB.CodeSetInfo.ISO_LATIN_3;
+import static org.apache.yoko.orb.OB.CodeSetInfo.ISO_LATIN_4;
 import static org.apache.yoko.util.Collectors.neverCombine;
 
 /**
@@ -43,28 +51,28 @@ class LatinCodec implements CharCodec {
     static LatinCodec getLatinCodec(Charset charset) {
         if (!charset.canEncode()) throw new UnsupportedCharsetException(charset.name());
         switch (charset.name()) {
-            case ISO_8859_2.NAME: return ISO_8859_2.INSTANCE;
-            case ISO_8859_3.NAME: return ISO_8859_3.INSTANCE;
-            case ISO_8859_4.NAME: return ISO_8859_4.INSTANCE;
-            case ISO_8859_5.NAME: return ISO_8859_5.INSTANCE;
-            case ISO_8859_6.NAME: return ISO_8859_6.INSTANCE;
-            case ISO_8859_7.NAME: return ISO_8859_7.INSTANCE;
-            case ISO_8859_8.NAME: return ISO_8859_8.INSTANCE;
-            case ISO_8859_9.NAME: return ISO_8859_9.INSTANCE;
+            case "ISO-8859-2": return Iso8859_2.INSTANCE;
+            case "ISO-8859-3": return Iso8859_3.INSTANCE;
+            case "ISO-8859-4": return Iso8859_4.INSTANCE;
+            case "ISO-8859-5": return Iso8859_5.INSTANCE;
+            case "ISO-8859-6": return Iso8859_6.INSTANCE;
+            case "ISO-8859-7": return Iso8859_7.INSTANCE;
+            case "ISO-8859-8": return Iso8859_8.INSTANCE;
+            case "ISO-8859-9": return Iso8859_9.INSTANCE;
             default: throw new UnsupportedCharsetException(charset.name());
         }
     }
 
     static LatinCodec getLatinCodec(CodeSetInfo csi) {
         switch (csi) {
-            case ISO_LATIN_2: return ISO_8859_2.INSTANCE;
-            case ISO_LATIN_3: return ISO_8859_3.INSTANCE;
-            case ISO_LATIN_4: return ISO_8859_4.INSTANCE;
-            case ISO_8859_5: return ISO_8859_5.INSTANCE;
-            case ISO_8859_6: return ISO_8859_6.INSTANCE;
-            case ISO_8859_7: return ISO_8859_7.INSTANCE;
-            case ISO_8859_8: return ISO_8859_8.INSTANCE;
-            case ISO_8859_9: return ISO_8859_9.INSTANCE;
+            case ISO_LATIN_2: return Iso8859_2.INSTANCE;
+            case ISO_LATIN_3: return Iso8859_3.INSTANCE;
+            case ISO_LATIN_4: return Iso8859_4.INSTANCE;
+            case ISO_8859_5: return Iso8859_5.INSTANCE;
+            case ISO_8859_6: return Iso8859_6.INSTANCE;
+            case ISO_8859_7: return Iso8859_7.INSTANCE;
+            case ISO_8859_8: return Iso8859_8.INSTANCE;
+            case ISO_8859_9: return Iso8859_9.INSTANCE;
         }
         throw new UnsupportedCharsetException(csi.name());
     }
@@ -73,22 +81,24 @@ static LatinCodec getLatinCodec(CodeSetInfo csi) {
     // (e.g. if only Latin-2 is used, the others are never created.)
     // N.B. NAME is a compile-time constant and gets inlined so using it does not drive class initialization
     // whereas dereferencing INSTANCE forces initialization.  (See JLS 12.4)
-    private interface ISO_8859_2 { String NAME = "ISO-8859-2"; LatinCodec INSTANCE = new LatinCodec(NAME); }
-    private interface ISO_8859_3 { String NAME = "ISO-8859-3"; LatinCodec INSTANCE = new LatinCodec(NAME); }
-    private interface ISO_8859_4 { String NAME = "ISO-8859-4"; LatinCodec INSTANCE = new LatinCodec(NAME); }
-    private interface ISO_8859_5 { String NAME = "ISO-8859-5"; LatinCodec INSTANCE = new LatinCodec(NAME); }
-    private interface ISO_8859_6 { String NAME = "ISO-8859-6"; LatinCodec INSTANCE = new LatinCodec(NAME); }
-    private interface ISO_8859_7 { String NAME = "ISO-8859-7"; LatinCodec INSTANCE = new LatinCodec(NAME); }
-    private interface ISO_8859_8 { String NAME = "ISO-8859-8"; LatinCodec INSTANCE = new LatinCodec(NAME); }
-    private interface ISO_8859_9 { String NAME = "ISO-8859-9"; LatinCodec INSTANCE = new LatinCodec(NAME); }
+    private interface Iso8859_2 { LatinCodec INSTANCE = new LatinCodec("ISO-8859-2", ISO_LATIN_2); }
+    private interface Iso8859_3 { LatinCodec INSTANCE = new LatinCodec("ISO-8859-3", ISO_LATIN_3); }
+    private interface Iso8859_4 { LatinCodec INSTANCE = new LatinCodec("ISO-8859-4", ISO_LATIN_4); }
+    private interface Iso8859_5 { LatinCodec INSTANCE = new LatinCodec("ISO-8859-5", ISO_8859_5); }
+    private interface Iso8859_6 { LatinCodec INSTANCE = new LatinCodec("ISO-8859-6", ISO_8859_6); }
+    private interface Iso8859_7 { LatinCodec INSTANCE = new LatinCodec("ISO-8859-7", ISO_8859_7); }
+    private interface Iso8859_8 { LatinCodec INSTANCE = new LatinCodec("ISO-8859-8", ISO_8859_8); }
+    private interface Iso8859_9 { LatinCodec INSTANCE = new LatinCodec("ISO-8859-9", ISO_8859_9); }
 
-    final String name;
-    final char[] decoderArray;
-    final Map<Character, Byte> encoderMap;
+    private final String name;
+    private final CodeSetInfo codeSetInfo;
+    private final char[] decoderArray;
+    private final Map<Character, Byte> encoderMap;
 
-    private LatinCodec(String name) {
+    private LatinCodec(String name, CodeSetInfo csi) {
         Charset cs  = Charset.forName(name);
         this.name = cs.name();
+        this.codeSetInfo = csi;
         ByteBuffer bytes = range(0, 256)
                 .collect(() -> allocate(256), (bb, b) -> bb.put(b, (byte) b), neverCombine());
         CharBuffer chars = cs.decode(bytes);
@@ -109,6 +119,9 @@ public char readChar(ReadBuffer in) {
     @Override
     public String name() { return name; }
 
+    @Override
+    public CodeSetInfo getCodeSetInfo() { return codeSetInfo; }
+
     @Override
     public boolean equals(Object o) {
         if (!(o instanceof LatinCodec)) return false;
@@ -120,4 +133,7 @@ public boolean equals(Object o) {
     public int hashCode() {
         return Objects.hashCode(name);
     }
+
+    @Override
+    public String toString() { return name; }
 }
diff --git a/...ache/yoko/orb/codecs/SimpleCharCodec.java → ...g/apache/yoko/codecs/SimpleCharCodec.java b/...ache/yoko/orb/codecs/SimpleCharCodec.java → ...g/apache/yoko/codecs/SimpleCharCodec.java
@@ -16,21 +16,24 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-package org.apache.yoko.orb.codecs;
+package org.apache.yoko.codecs;
 
 import org.apache.yoko.io.ReadBuffer;
 import org.apache.yoko.io.WriteBuffer;
+import org.apache.yoko.orb.OB.CodeSetInfo;
 
-import static org.apache.yoko.orb.codecs.Util.expect7bit;
-import static org.apache.yoko.orb.codecs.Util.require7bit;
-import static org.apache.yoko.orb.codecs.Util.require8bit;
+import static org.apache.yoko.codecs.Util.expect7bit;
+import static org.apache.yoko.codecs.Util.require7bit;
+import static org.apache.yoko.codecs.Util.require8bit;
 
 enum SimpleCharCodec implements CharCodec {
     US_ASCII {
+        public CodeSetInfo getCodeSetInfo() { return CodeSetInfo.ISO_646_IRV; }
         public char readChar(ReadBuffer in) { return expect7bit(in.readByteAsChar()); }
         public void writeChar(char c, WriteBuffer out) { out.writeByte(require7bit(c)); }
     },
     ISO_LATIN_1 {
+        public CodeSetInfo getCodeSetInfo() { return CodeSetInfo.ISO_LATIN_1; }
         public char readChar(ReadBuffer in) { return in.readByteAsChar(); } // no checking - a single-byte character can't be > 0xFF
         public void writeChar(char c, WriteBuffer out) { out.writeByte(require8bit(c)); }
     }