From 2919abaaa71986ff17d37376bb8136fcb4130623 Mon Sep 17 00:00:00 2001 From: Nikolay Antonov Date: Thu, 5 Feb 2026 12:31:18 +0500 Subject: [PATCH 1/6] HBASE: use shaded version of hbase-client --- ci/singlecluster/Dockerfile | 6 ++--- ci/singlecluster/README.HDP3.md | 2 +- server/build.gradle | 16 +++++++++---- server/gradle.properties | 2 +- server/pxf-hbase/build.gradle | 24 +++++++++++-------- .../plugins/hbase/HBaseDataFragmenter.java | 1 - .../utilities/HBaseDoubleComparator.java | 4 ++-- .../hbase/utilities/HBaseFloatComparator.java | 4 ++-- .../utilities/HBaseIntegerComparator.java | 4 ++-- 9 files changed, 36 insertions(+), 27 deletions(-) diff --git a/ci/singlecluster/Dockerfile b/ci/singlecluster/Dockerfile index 8e8c4621a..da8068e85 100644 --- a/ci/singlecluster/Dockerfile +++ b/ci/singlecluster/Dockerfile @@ -12,14 +12,14 @@ RUN sudo apt-get update && \ ENV HADOOP_VERSION=3.1.2 ENV HIVE_VERSION=3.1.3 ENV ZOOKEEPER_VERSION=3.5.9 -ENV HBASE_VERSION=2.0.6 +ENV HBASE_VERSION=2.2.7 ENV TEZ_VERSION=0.9.2 # checksums from archive.apache.org ENV HADOOP_SHA512="0e0ee817c89b3c4eb761eca7f16640742a83b0e99b6fda26c1bee2baabedad93aab86e252bf5f1e2381c6d464bc4003d10c7cc0f61b2062f4c59732ca24d1bd9" ENV HIVE_SHA256="0c9b6a6359a7341b6029cc9347435ee7b379f93846f779d710b13f795b54bb16" ENV ZOOKEEPER_SHA512="0e5a64713abc6f36d961dd61a06f681868171a9d9228366e512a01324806d263e05508029c94d8e18307811867cdc39d848e736c252bf56c461273ef74c66a45" -ENV HBASE_SHA512="a0e10904ecf7f059b77bc0ce704254046a978126db720cc7e55dc53b87097715da64b8391fe3cc94348bc432871ad8f29891dc8df1ea052eb628da0fdca97c93" +ENV HBASE_SHA512="6ba6d9d2131ada457f63b80f50682230b2589797e4187b2d450e692ab6cd40c68a979dbfb150f5ce48e6d41370f02238b38bebd7f01694c2e8d53937044f526f" ENV TEZ_SHA512="a2d94bd9fa778d42a8bac9d9da8e263e469ddfef93968b06434716554995f490231de5607541ac236e770aa0158b64250c38bc1cd57dbfa629fea705f2ffa2f5" # faster mirror: @@ -63,7 +63,7 @@ RUN mkdir -p $ZOOKEEPER_ROOT && \ RUN mkdir -p $HBASE_ROOT && \ curl -fSL "$HBASE_URL" -o hbase.tar.gz && \ echo "$HBASE_SHA512 hbase.tar.gz" | sha512sum -c && \ - tar xvf hbase.tar.gz -C $HBASE_ROOT --strip-components 1 --exclude="docs/*" && \ + tar xvf hbase.tar.gz -C $HBASE_ROOT --strip-components 1 --exclude="docs/*" --exclude="lib/*-tests.jar" && \ rm hbase.tar.gz RUN mkdir -p $TEZ_ROOT && \ diff --git a/ci/singlecluster/README.HDP3.md b/ci/singlecluster/README.HDP3.md index 16506de11..14c2b5310 100644 --- a/ci/singlecluster/README.HDP3.md +++ b/ci/singlecluster/README.HDP3.md @@ -7,7 +7,7 @@ It contains the following versions: - Hadoop 3.3.6 - Hive 3.1.3 - Zookeeper 3.5.9 -- HBase 2.0.6 +- HBase 2.2.7 - Tez 0.9.2 This version of Single cluster requires users to make some manual changes to the configuration files once the tarball has been unpacked (see Initialization steps below). diff --git a/server/build.gradle b/server/build.gradle index a1b6275c2..6a39ae0a3 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -138,12 +138,18 @@ configure(javaProjects) { } // HBase dependencies + dependency("org.apache.yetus:audience-annotations:0.13.0") dependencySet(group:"org.apache.hbase", version:"${hbaseVersion}") { entry("hbase-annotations") - entry("hbase-client") - entry("hbase-common") - entry("hbase-protocol") + entry("hbase-shaded-client") + entry("hbase-protocol-shaded") } + dependencySet(group:"org.apache.hbase.thirdparty", version:"4.1.12") { + entry("hbase-shaded-protobuf") + } + dependency("io.opentelemetry:opentelemetry-api:1.49.0") + dependency("io.opentelemetry:opentelemetry-context:1.49.0") + dependency("io.opentelemetry.semconv:opentelemetry-semconv:1.29.0-alpha") // Hive dependencies dependency("org.apache.hive:hive-storage-api:${hiveStorageApiVersion}") @@ -237,7 +243,7 @@ configure(javaProjects) { options.compilerArgs += [ "-g", "-Xlint:varargs", "-Xlint:cast", "-Xlint:classfile", "-Xlint:dep-ann", "-Xlint:divzero", "-Xlint:empty", "-Xlint:finally", "-Xlint:overrides", "-Xlint:path", "-Xlint:-processing", "-Xlint:static", - "-Xlint:try", "-Xlint:fallthrough", "-Xlint:deprecation", "-Xlint:unchecked", "-Xlint:-options", "-Werror" + "-Xlint:try", "-Xlint:fallthrough", "-Xlint:unchecked", "-Xlint:-options", "-Werror" ] } @@ -245,7 +251,7 @@ configure(javaProjects) { options.compilerArgs += [ "-g", "-Xlint:varargs", "-Xlint:cast", "-Xlint:classfile", "-Xlint:dep-ann", "-Xlint:divzero", "-Xlint:empty", "-Xlint:finally", "-Xlint:overrides", "-Xlint:path", "-Xlint:-processing", "-Xlint:static", - "-Xlint:try", "-Xlint:fallthrough", "-Xlint:deprecation", "-Xlint:unchecked", "-Xlint:-options", "-Werror" + "-Xlint:try", "-Xlint:fallthrough", "-Xlint:unchecked", "-Xlint:-options", "-Werror" ] } diff --git a/server/gradle.properties b/server/gradle.properties index 42da880a3..59d8fa9d1 100644 --- a/server/gradle.properties +++ b/server/gradle.properties @@ -21,7 +21,7 @@ license=ASL 2.0 hadoopVersion=2.10.2 hiveVersion=2.3.8 hiveStorageApiVersion=2.7.3 -hbaseVersion=1.3.2 +hbaseVersion=2.6.4-hadoop3 junitVersion=4.11 parquetVersion=1.12.3 awsJavaSdk=1.12.261 diff --git a/server/pxf-hbase/build.gradle b/server/pxf-hbase/build.gradle index 026d602b9..d82822016 100644 --- a/server/pxf-hbase/build.gradle +++ b/server/pxf-hbase/build.gradle @@ -17,20 +17,24 @@ dependencies { compileOnly("org.apache.hbase:hbase-annotations") /******************************* - * Implementation Dependencies + * Project Dependencies *******************************/ implementation(project(':pxf-api')) - implementation("com.google.protobuf:protobuf-java") implementation("commons-collections:commons-collections") - implementation("org.apache.hbase:hbase-client") { transitive = false } - implementation("org.apache.hbase:hbase-common") { transitive = false } - implementation("org.apache.hbase:hbase-protocol") { transitive = false } - implementation("org.apache.htrace:htrace-core") { transitive = false } - implementation("org.apache.zookeeper:zookeeper") { transitive = false } - implementation("io.netty:netty-common") { transitive = false } - implementation("io.netty:netty-transport") { transitive = false } - implementation("com.yammer.metrics:metrics-core") { transitive = false } + + /******************************* + * Hbase + *******************************/ + + implementation("org.apache.hbase:hbase-shaded-client") { transitive = false } + implementation("io.opentelemetry:opentelemetry-api") { transitive = false } + implementation("io.opentelemetry:opentelemetry-context") { transitive = false } + implementation("org.apache.yetus:audience-annotations") { transitive = false } + implementation("io.opentelemetry.semconv:opentelemetry-semconv") { transitive = false } + implementation("org.apache.hbase:hbase-annotations") { transitive = false } + implementation("org.apache.hbase:hbase-protocol-shaded") { transitive = false } + implementation("org.apache.hbase.thirdparty:hbase-shaded-protobuf") { transitive = false } implementation("org.springframework.boot:spring-boot-starter-log4j2") diff --git a/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/HBaseDataFragmenter.java b/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/HBaseDataFragmenter.java index 21d5d42d7..774fb9c22 100644 --- a/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/HBaseDataFragmenter.java +++ b/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/HBaseDataFragmenter.java @@ -81,7 +81,6 @@ public FragmentStats getFragmentStats() { public List getFragments() throws Exception { // check that Zookeeper and HBase master are available - HBaseAdmin.checkHBaseAvailable(configuration); connection = ConnectionFactory.createConnection(configuration); Admin hbaseAdmin = connection.getAdmin(); if (!HBaseUtilities.isTableAvailable(hbaseAdmin, context.getDataSource())) { diff --git a/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseDoubleComparator.java b/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseDoubleComparator.java index 3627ca972..d5193da43 100644 --- a/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseDoubleComparator.java +++ b/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseDoubleComparator.java @@ -19,8 +19,8 @@ * under the License. */ -import com.google.protobuf.ByteString; -import com.google.protobuf.InvalidProtocolBufferException; +import org.apache.hadoop.hbase.shaded.com.google.protobuf.ByteString; +import org.apache.hadoop.hbase.shaded.com.google.protobuf.InvalidProtocolBufferException; import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.filter.ByteArrayComparable; import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos; diff --git a/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseFloatComparator.java b/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseFloatComparator.java index bf26b0033..e0f39bb02 100644 --- a/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseFloatComparator.java +++ b/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseFloatComparator.java @@ -19,8 +19,8 @@ * under the License. */ -import com.google.protobuf.ByteString; -import com.google.protobuf.InvalidProtocolBufferException; +import org.apache.hadoop.hbase.shaded.com.google.protobuf.ByteString; +import org.apache.hadoop.hbase.shaded.com.google.protobuf.InvalidProtocolBufferException; import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.filter.ByteArrayComparable; import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos; diff --git a/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseIntegerComparator.java b/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseIntegerComparator.java index ffc0a10eb..39b094e74 100644 --- a/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseIntegerComparator.java +++ b/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseIntegerComparator.java @@ -26,8 +26,8 @@ import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos; import org.apache.hadoop.hbase.util.Bytes; -import com.google.protobuf.ByteString; -import com.google.protobuf.InvalidProtocolBufferException; +import org.apache.hadoop.hbase.shaded.com.google.protobuf.ByteString; +import org.apache.hadoop.hbase.shaded.com.google.protobuf.InvalidProtocolBufferException; /** * This is a Filter comparator for HBase It is external to PXF HBase code. From 89cf563df59280608212a104957f255e5a5fde31 Mon Sep 17 00:00:00 2001 From: Nikolay Antonov Date: Sat, 7 Feb 2026 22:20:00 +0500 Subject: [PATCH 2/6] Expiremental dependencies: * explicitly mention _all_ dependencies --- server/build.gradle | 21 ++++++++--- server/gradle.properties | 2 +- server/pxf-hbase/build.gradle | 36 ++++++++++++++----- .../utilities/HBaseDoubleComparator.java | 6 ++-- .../hbase/utilities/HBaseFloatComparator.java | 6 ++-- .../utilities/HBaseIntegerComparator.java | 6 ++-- 6 files changed, 54 insertions(+), 23 deletions(-) diff --git a/server/build.gradle b/server/build.gradle index 6a39ae0a3..e3371460a 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -85,11 +85,12 @@ configure(javaProjects) { dependency("com.univocity:univocity-parsers:2.9.1") dependency("com.yammer.metrics:metrics-core:2.2.0") dependency("com.zaxxer:HikariCP:3.4.5") - dependency("commons-codec:commons-codec:1.14") + dependency("commons-codec:commons-codec:1.15") dependency("commons-collections:commons-collections:3.2.2") dependency("commons-configuration:commons-configuration:1.10") dependency("commons-io:commons-io:2.7") dependency("commons-lang:commons-lang:2.6") + dependency("commons-lang:commons-lang3:3.18.0") dependency("commons-logging:commons-logging:1.1.3") dependency("io.airlift:aircompressor:0.27") dependency("javax.jdo:jdo-api:3.0.1") @@ -97,6 +98,7 @@ configure(javaProjects) { dependency("net.sf.opencsv:opencsv:2.3") dependency("org.antlr:antlr-runtime:3.5.2") dependency("org.apache.commons:commons-compress:1.20") + dependency("org.apache.commons:commons-crypto:1.1.0") dependency("org.apache.htrace:htrace-core:3.1.0-incubating") dependency("org.apache.htrace:htrace-core4:4.0.1-incubating") @@ -138,18 +140,29 @@ configure(javaProjects) { } // HBase dependencies - dependency("org.apache.yetus:audience-annotations:0.13.0") dependencySet(group:"org.apache.hbase", version:"${hbaseVersion}") { - entry("hbase-annotations") - entry("hbase-shaded-client") + entry("hbase-client") + entry("hbase-common") + entry("hbase-protocol") entry("hbase-protocol-shaded") + entry("hbase-logging") + entry("hbase-hadoop-compat") + entry("hbase-hadoop2-compat") + entry("hbase-metrics-api") + entry("hbase-metrics") } dependencySet(group:"org.apache.hbase.thirdparty", version:"4.1.12") { entry("hbase-shaded-protobuf") + entry("hbase-shaded-miscellaneous") + entry("hbase-shaded-gson") + entry("hbase-shaded-netty") + entry("hbase-unsafe") } + dependency("org.apache.yetus:audience-annotations:0.13.0") dependency("io.opentelemetry:opentelemetry-api:1.49.0") dependency("io.opentelemetry:opentelemetry-context:1.49.0") dependency("io.opentelemetry.semconv:opentelemetry-semconv:1.29.0-alpha") + dependency("io.dropwizard.metrics:metrics-core:3.2.6") // Hive dependencies dependency("org.apache.hive:hive-storage-api:${hiveStorageApiVersion}") diff --git a/server/gradle.properties b/server/gradle.properties index 59d8fa9d1..c9a9f4de6 100644 --- a/server/gradle.properties +++ b/server/gradle.properties @@ -21,7 +21,7 @@ license=ASL 2.0 hadoopVersion=2.10.2 hiveVersion=2.3.8 hiveStorageApiVersion=2.7.3 -hbaseVersion=2.6.4-hadoop3 +hbaseVersion=2.6.4 junitVersion=4.11 parquetVersion=1.12.3 awsJavaSdk=1.12.261 diff --git a/server/pxf-hbase/build.gradle b/server/pxf-hbase/build.gradle index d82822016..a37947fa2 100644 --- a/server/pxf-hbase/build.gradle +++ b/server/pxf-hbase/build.gradle @@ -14,7 +14,6 @@ dependencies { *******************************/ compileOnly("com.google.code.findbugs:annotations") - compileOnly("org.apache.hbase:hbase-annotations") /******************************* * Project Dependencies @@ -27,14 +26,34 @@ dependencies { * Hbase *******************************/ - implementation("org.apache.hbase:hbase-shaded-client") { transitive = false } - implementation("io.opentelemetry:opentelemetry-api") { transitive = false } - implementation("io.opentelemetry:opentelemetry-context") { transitive = false } - implementation("org.apache.yetus:audience-annotations") { transitive = false } - implementation("io.opentelemetry.semconv:opentelemetry-semconv") { transitive = false } - implementation("org.apache.hbase:hbase-annotations") { transitive = false } + implementation("org.apache.hbase:hbase-client") { transitive = false } + implementation("org.apache.hbase.thirdparty:hbase-shaded-protobuf") { transitive = false } + implementation("org.apache.hbase:hbase-common") { transitive = false } + implementation("org.apache.hbase:hbase-logging") { transitive = false } + implementation("org.apache.hbase.thirdparty:hbase-shaded-miscellaneous") { transitive = false } + implementation("org.apache.hbase.thirdparty:hbase-shaded-gson") { transitive = false } + implementation("org.apache.hbase.thirdparty:hbase-shaded-netty") { transitive = false } + implementation("org.apache.hbase.thirdparty:hbase-unsafe") { transitive = false } + implementation("org.apache.commons:commons-lang3") { transitive = false } + implementation("org.apache.commons:commons-crypto") { transitive = false } + implementation("org.apache.hadoop:hadoop-common") { transitive = false } + implementation("org.apache.hadoop:hadoop-auth") { transitive = false } + implementation("org.apache.hbase:hbase-hadoop-compat") { transitive = false } + implementation("org.apache.hbase:hbase-metrics-api") { transitive = false } + implementation("org.apache.hbase:hbase-metrics") { transitive = false } + implementation("org.apache.hbase:hbase-hadoop2-compat") { transitive = false } implementation("org.apache.hbase:hbase-protocol-shaded") { transitive = false } - implementation("org.apache.hbase.thirdparty:hbase-shaded-protobuf") { transitive = false } + implementation("org.apache.hbase:hbase-protocol") { transitive = false } + implementation("com.google.protobuf:protobuf-java") { transitive = false } + implementation("org.apache.zookeeper:zookeeper") { transitive = false } +// skip JRuby - it is part of interactive shell +// implementation("org.jruby.jcodings:jcodings:1.0.58") { transitive = false } +// implementation("org.jruby.joni:joni:2.2.1") { transitive = false } + implementation("org.apache.yetus:audience-annotations") { transitive = false } + implementation("io.opentelemetry:opentelemetry-api") { transitive = false } + implementation("io.opentelemetry:opentelemetry-context") { transitive = false } + implementation("io.opentelemetry.semconv:opentelemetry-semconv") { transitive = false } + implementation("io.dropwizard.metrics:metrics-core:3.2.6") { transitive = false } implementation("org.springframework.boot:spring-boot-starter-log4j2") @@ -43,7 +62,6 @@ dependencies { *******************************/ testCompileOnly("com.google.code.findbugs:annotations") - testCompileOnly("org.apache.hbase:hbase-annotations") testImplementation("com.esotericsoftware:minlog") testImplementation("com.esotericsoftware:reflectasm") testImplementation('org.springframework.boot:spring-boot-starter-test') diff --git a/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseDoubleComparator.java b/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseDoubleComparator.java index d5193da43..6dedaf850 100644 --- a/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseDoubleComparator.java +++ b/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseDoubleComparator.java @@ -19,11 +19,11 @@ * under the License. */ -import org.apache.hadoop.hbase.shaded.com.google.protobuf.ByteString; -import org.apache.hadoop.hbase.shaded.com.google.protobuf.InvalidProtocolBufferException; +import org.apache.hbase.thirdparty.com.google.protobuf.ByteString; +import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException; import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.filter.ByteArrayComparable; -import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos; +import org.apache.hadoop.hbase.shaded.protobuf.generated.ComparatorProtos; import org.apache.hadoop.hbase.util.Bytes; public class HBaseDoubleComparator extends ByteArrayComparable { diff --git a/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseFloatComparator.java b/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseFloatComparator.java index e0f39bb02..a5291de62 100644 --- a/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseFloatComparator.java +++ b/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseFloatComparator.java @@ -19,11 +19,11 @@ * under the License. */ -import org.apache.hadoop.hbase.shaded.com.google.protobuf.ByteString; -import org.apache.hadoop.hbase.shaded.com.google.protobuf.InvalidProtocolBufferException; +import org.apache.hbase.thirdparty.com.google.protobuf.ByteString; +import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException; import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.filter.ByteArrayComparable; -import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos; +import org.apache.hadoop.hbase.shaded.protobuf.generated.ComparatorProtos; import org.apache.hadoop.hbase.util.Bytes; public class HBaseFloatComparator extends ByteArrayComparable{ diff --git a/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseIntegerComparator.java b/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseIntegerComparator.java index 39b094e74..a1e589c52 100644 --- a/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseIntegerComparator.java +++ b/server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/utilities/HBaseIntegerComparator.java @@ -23,11 +23,11 @@ import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.filter.ByteArrayComparable; import org.apache.hadoop.hbase.filter.SubstringComparator; -import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos; +import org.apache.hadoop.hbase.shaded.protobuf.generated.ComparatorProtos; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.shaded.com.google.protobuf.ByteString; -import org.apache.hadoop.hbase.shaded.com.google.protobuf.InvalidProtocolBufferException; +import org.apache.hbase.thirdparty.com.google.protobuf.ByteString; +import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException; /** * This is a Filter comparator for HBase It is external to PXF HBase code. From c079a00fdd3e367ff373339ee6dcff6144edbdca Mon Sep 17 00:00:00 2001 From: Nikolay Antonov Date: Sun, 8 Feb 2026 00:37:18 +0500 Subject: [PATCH 3/6] downgrade hbase to 2.3.7 - version that works with zookeeper-3.5.x --- ci/singlecluster/Dockerfile | 6 +++--- ci/singlecluster/README.HDP3.md | 2 +- server/build.gradle | 12 ++++++------ server/gradle.properties | 2 +- server/pxf-hbase/build.gradle | 3 ++- 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/ci/singlecluster/Dockerfile b/ci/singlecluster/Dockerfile index da8068e85..abb60e3cc 100644 --- a/ci/singlecluster/Dockerfile +++ b/ci/singlecluster/Dockerfile @@ -12,14 +12,14 @@ RUN sudo apt-get update && \ ENV HADOOP_VERSION=3.1.2 ENV HIVE_VERSION=3.1.3 ENV ZOOKEEPER_VERSION=3.5.9 -ENV HBASE_VERSION=2.2.7 +ENV HBASE_VERSION=2.3.7 ENV TEZ_VERSION=0.9.2 # checksums from archive.apache.org ENV HADOOP_SHA512="0e0ee817c89b3c4eb761eca7f16640742a83b0e99b6fda26c1bee2baabedad93aab86e252bf5f1e2381c6d464bc4003d10c7cc0f61b2062f4c59732ca24d1bd9" ENV HIVE_SHA256="0c9b6a6359a7341b6029cc9347435ee7b379f93846f779d710b13f795b54bb16" ENV ZOOKEEPER_SHA512="0e5a64713abc6f36d961dd61a06f681868171a9d9228366e512a01324806d263e05508029c94d8e18307811867cdc39d848e736c252bf56c461273ef74c66a45" -ENV HBASE_SHA512="6ba6d9d2131ada457f63b80f50682230b2589797e4187b2d450e692ab6cd40c68a979dbfb150f5ce48e6d41370f02238b38bebd7f01694c2e8d53937044f526f" +ENV HBASE_SHA512="1032521025660daa70260cdc931f52a26c87596be444451fe1fa88b526ede55e9d6b4220e91ff6f7422bec11f30d64fa6745e95a9c36971fdb1a264a2c745693" ENV TEZ_SHA512="a2d94bd9fa778d42a8bac9d9da8e263e469ddfef93968b06434716554995f490231de5607541ac236e770aa0158b64250c38bc1cd57dbfa629fea705f2ffa2f5" # faster mirror: @@ -63,7 +63,7 @@ RUN mkdir -p $ZOOKEEPER_ROOT && \ RUN mkdir -p $HBASE_ROOT && \ curl -fSL "$HBASE_URL" -o hbase.tar.gz && \ echo "$HBASE_SHA512 hbase.tar.gz" | sha512sum -c && \ - tar xvf hbase.tar.gz -C $HBASE_ROOT --strip-components 1 --exclude="docs/*" --exclude="lib/*-tests.jar" && \ + tar xvf hbase.tar.gz -C $HBASE_ROOT --strip-components 1 --exclude="docs/*" --exclude="lib/*-tests.jar" --exclude="lib/shaded-clients" && \ rm hbase.tar.gz RUN mkdir -p $TEZ_ROOT && \ diff --git a/ci/singlecluster/README.HDP3.md b/ci/singlecluster/README.HDP3.md index 14c2b5310..3a906e50a 100644 --- a/ci/singlecluster/README.HDP3.md +++ b/ci/singlecluster/README.HDP3.md @@ -7,7 +7,7 @@ It contains the following versions: - Hadoop 3.3.6 - Hive 3.1.3 - Zookeeper 3.5.9 -- HBase 2.2.7 +- HBase 2.3.7 - Tez 0.9.2 This version of Single cluster requires users to make some manual changes to the configuration files once the tarball has been unpacked (see Initialization steps below). diff --git a/server/build.gradle b/server/build.gradle index e3371460a..410ec1f23 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -85,12 +85,12 @@ configure(javaProjects) { dependency("com.univocity:univocity-parsers:2.9.1") dependency("com.yammer.metrics:metrics-core:2.2.0") dependency("com.zaxxer:HikariCP:3.4.5") - dependency("commons-codec:commons-codec:1.15") + dependency("commons-codec:commons-codec:1.14") dependency("commons-collections:commons-collections:3.2.2") dependency("commons-configuration:commons-configuration:1.10") dependency("commons-io:commons-io:2.7") dependency("commons-lang:commons-lang:2.6") - dependency("commons-lang:commons-lang3:3.18.0") + dependency("commons-lang:commons-lang3:3.9") dependency("commons-logging:commons-logging:1.1.3") dependency("io.airlift:aircompressor:0.27") dependency("javax.jdo:jdo-api:3.0.1") @@ -98,9 +98,9 @@ configure(javaProjects) { dependency("net.sf.opencsv:opencsv:2.3") dependency("org.antlr:antlr-runtime:3.5.2") dependency("org.apache.commons:commons-compress:1.20") - dependency("org.apache.commons:commons-crypto:1.1.0") + dependency("org.apache.commons:commons-crypto:1.0.0") dependency("org.apache.htrace:htrace-core:3.1.0-incubating") - dependency("org.apache.htrace:htrace-core4:4.0.1-incubating") + dependency("org.apache.htrace:htrace-core4:4.2.0-incubating") dependency("org.apache.zookeeper:zookeeper:3.4.6") dependency("org.codehaus.woodstox:stax2-api:3.1.4") @@ -151,14 +151,14 @@ configure(javaProjects) { entry("hbase-metrics-api") entry("hbase-metrics") } - dependencySet(group:"org.apache.hbase.thirdparty", version:"4.1.12") { + dependencySet(group:"org.apache.hbase.thirdparty", version:"3.3.0") { entry("hbase-shaded-protobuf") entry("hbase-shaded-miscellaneous") entry("hbase-shaded-gson") entry("hbase-shaded-netty") entry("hbase-unsafe") } - dependency("org.apache.yetus:audience-annotations:0.13.0") + dependency("org.apache.yetus:audience-annotations:0.5.0") dependency("io.opentelemetry:opentelemetry-api:1.49.0") dependency("io.opentelemetry:opentelemetry-context:1.49.0") dependency("io.opentelemetry.semconv:opentelemetry-semconv:1.29.0-alpha") diff --git a/server/gradle.properties b/server/gradle.properties index c9a9f4de6..ea8b0c665 100644 --- a/server/gradle.properties +++ b/server/gradle.properties @@ -21,7 +21,7 @@ license=ASL 2.0 hadoopVersion=2.10.2 hiveVersion=2.3.8 hiveStorageApiVersion=2.7.3 -hbaseVersion=2.6.4 +hbaseVersion=2.3.7 junitVersion=4.11 parquetVersion=1.12.3 awsJavaSdk=1.12.261 diff --git a/server/pxf-hbase/build.gradle b/server/pxf-hbase/build.gradle index a37947fa2..5924b8f22 100644 --- a/server/pxf-hbase/build.gradle +++ b/server/pxf-hbase/build.gradle @@ -33,7 +33,6 @@ dependencies { implementation("org.apache.hbase.thirdparty:hbase-shaded-miscellaneous") { transitive = false } implementation("org.apache.hbase.thirdparty:hbase-shaded-gson") { transitive = false } implementation("org.apache.hbase.thirdparty:hbase-shaded-netty") { transitive = false } - implementation("org.apache.hbase.thirdparty:hbase-unsafe") { transitive = false } implementation("org.apache.commons:commons-lang3") { transitive = false } implementation("org.apache.commons:commons-crypto") { transitive = false } implementation("org.apache.hadoop:hadoop-common") { transitive = false } @@ -46,6 +45,8 @@ dependencies { implementation("org.apache.hbase:hbase-protocol") { transitive = false } implementation("com.google.protobuf:protobuf-java") { transitive = false } implementation("org.apache.zookeeper:zookeeper") { transitive = false } + implementation("io.netty:netty-common") { transitive = false } + implementation("io.netty:netty-transport") { transitive = false } // skip JRuby - it is part of interactive shell // implementation("org.jruby.jcodings:jcodings:1.0.58") { transitive = false } // implementation("org.jruby.joni:joni:2.2.1") { transitive = false } From c42addf0e4bd85775b50808f0039e1620c0f77f7 Mon Sep 17 00:00:00 2001 From: Nikolay Antonov Date: Thu, 12 Feb 2026 14:49:55 +0500 Subject: [PATCH 4/6] bump parquet-1.15.2 # Conflicts: # server/build.gradle # server/gradle.properties --- server/build.gradle | 4 ++-- server/gradle.properties | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/server/build.gradle b/server/build.gradle index 410ec1f23..48f336e46 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -92,7 +92,7 @@ configure(javaProjects) { dependency("commons-lang:commons-lang:2.6") dependency("commons-lang:commons-lang3:3.9") dependency("commons-logging:commons-logging:1.1.3") - dependency("io.airlift:aircompressor:0.27") + dependency("io.airlift:aircompressor:2.0.2") dependency("javax.jdo:jdo-api:3.0.1") dependency("joda-time:joda-time:2.8.1") dependency("net.sf.opencsv:opencsv:2.3") @@ -122,7 +122,7 @@ configure(javaProjects) { dependency("org.threeten:threeten-extra:1.5.0") dependency("org.tukaani:xz:1.8") dependency("org.wildfly.openssl:wildfly-openssl:1.0.7.Final") - dependency("org.xerial.snappy:snappy-java:1.1.10.4") + dependency("org.xerial.snappy:snappy-java:1.1.10.7") // Hadoop dependencies dependencySet(group:"org.apache.hadoop", version:"${hadoopVersion}") { diff --git a/server/gradle.properties b/server/gradle.properties index ea8b0c665..e0f416347 100644 --- a/server/gradle.properties +++ b/server/gradle.properties @@ -23,7 +23,7 @@ hiveVersion=2.3.8 hiveStorageApiVersion=2.7.3 hbaseVersion=2.3.7 junitVersion=4.11 -parquetVersion=1.12.3 +parquetVersion=1.15.2 awsJavaSdk=1.12.261 springBootVersion=2.7.18 org.gradle.daemon=true From e921f975f82a042a4f7b077303c2faf329285ab6 Mon Sep 17 00:00:00 2001 From: Nikolay Antonov Date: Sat, 14 Feb 2026 23:47:24 +0500 Subject: [PATCH 5/6] Add LZ4_RAW compression support test --- .../pxf/automation/features/parquet/ParquetWriteTest.java | 5 +++++ docs/content/hdfs_parquet.html.md.erb | 2 +- server/build.gradle | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/automation/src/test/java/org/apache/cloudberry/pxf/automation/features/parquet/ParquetWriteTest.java b/automation/src/test/java/org/apache/cloudberry/pxf/automation/features/parquet/ParquetWriteTest.java index 18218eac1..0bd9b611d 100644 --- a/automation/src/test/java/org/apache/cloudberry/pxf/automation/features/parquet/ParquetWriteTest.java +++ b/automation/src/test/java/org/apache/cloudberry/pxf/automation/features/parquet/ParquetWriteTest.java @@ -210,6 +210,11 @@ public void parquetWritePrimitivesZStd() throws Exception { runWritePrimitivesScenario("pxf_parquet_write_primitives_zstd", "pxf_parquet_read_primitives_zstd", "parquet_write_primitives_zstd", new String[]{"COMPRESSION_CODEC=zstd"}); } + @Test(groups = {"features", "gpdb", "security", "hcfs"}) + public void parquetWritePrimitivesLZ4_RAW() throws Exception { + runWritePrimitivesScenario("pxf_parquet_write_primitives_lz4_raw", "pxf_parquet_read_primitives_lz4_raw", "parquet_write_primitives_lz4_raw", new String[]{"COMPRESSION_CODEC=lz4_raw"}); + } + // Numeric precision not defined, test writing data precision in [1, 38]. All the data should be written correctly. @Test(groups = {"features", "gpdb", "security", "hcfs"}) public void parquetWriteUndefinedPrecisionNumeric() throws Exception { diff --git a/docs/content/hdfs_parquet.html.md.erb b/docs/content/hdfs_parquet.html.md.erb index 9ad05b785..75ca69817 100644 --- a/docs/content/hdfs_parquet.html.md.erb +++ b/docs/content/hdfs_parquet.html.md.erb @@ -23,7 +23,7 @@ under the License. Use the PXF HDFS connector to read and write Parquet-format data. This section describes how to read and write HDFS files that are stored in Parquet format, including how to create, query, and insert into external tables that reference files in the HDFS data store. -PXF supports reading or writing Parquet files compressed with these codecs: `snappy`, `gzip`, and `zstd`. +PXF supports reading or writing Parquet files compressed with these codecs: `snappy`, `gzip`, 'lz4_raw' and `zstd`. PXF currently supports reading and writing primitive Parquet data types only. diff --git a/server/build.gradle b/server/build.gradle index 48f336e46..3eb6b087c 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -212,7 +212,7 @@ configure(javaProjects) { entry("avro") entry("avro-mapred") } - // Zstd support for Avro + // Zstd support for Avro/Parquet dependency("com.github.luben:zstd-jni:1.5.7-6") // Jackson 1.x dependencies From 16f188660a1149f0867c6b16e722a4bec1b682f2 Mon Sep 17 00:00:00 2001 From: Nikolay Antonov Date: Sun, 15 Feb 2026 00:11:54 +0500 Subject: [PATCH 6/6] better --- docs/content/hdfs_parquet.html.md.erb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/content/hdfs_parquet.html.md.erb b/docs/content/hdfs_parquet.html.md.erb index 75ca69817..856d9fbf6 100644 --- a/docs/content/hdfs_parquet.html.md.erb +++ b/docs/content/hdfs_parquet.html.md.erb @@ -182,7 +182,7 @@ The PXF `hdfs:parquet` profile supports encoding- and compression-related write | Write Option | Value Description | |-------|-------------------------------------| -| COMPRESSION_CODEC | The compression codec alias. Supported compression codecs for writing Parquet data include: `snappy`, `gzip`, `zstd`, and `uncompressed` . If this option is not provided, PXF compresses the data using `snappy` compression. | +| COMPRESSION_CODEC | The compression codec alias. Supported compression codecs for writing Parquet data include: `snappy`, `gzip`, `lz4_raw`, `zstd`, and `uncompressed` . If this option is not provided, PXF compresses the data using `snappy` compression. | | ROWGROUP_SIZE | A Parquet file consists of one or more row groups, a logical partitioning of the data into rows. `ROWGROUP_SIZE` identifies the size (in bytes) of the row group. The default row group size is `8 * 1024 * 1024` bytes. | | PAGE_SIZE | A row group consists of column chunks that are divided up into pages. `PAGE_SIZE` is the size (in bytes) of such a page. The default page size is `1 * 1024 * 1024` bytes. | | ENABLE\_DICTIONARY | A boolean value that specifies whether or not to enable dictionary encoding. The default value is `true`; dictionary encoding is enabled when PXF writes Parquet files. |