Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,11 @@ public void parquetWritePrimitivesZStd() throws Exception {
runWritePrimitivesScenario("pxf_parquet_write_primitives_zstd", "pxf_parquet_read_primitives_zstd", "parquet_write_primitives_zstd", new String[]{"COMPRESSION_CODEC=zstd"});
}

@Test(groups = {"features", "gpdb", "security", "hcfs"})
public void parquetWritePrimitivesLZ4_RAW() throws Exception {
runWritePrimitivesScenario("pxf_parquet_write_primitives_lz4_raw", "pxf_parquet_read_primitives_lz4_raw", "parquet_write_primitives_lz4_raw", new String[]{"COMPRESSION_CODEC=lz4_raw"});
}

// Numeric precision not defined, test writing data precision in [1, 38]. All the data should be written correctly.
@Test(groups = {"features", "gpdb", "security", "hcfs"})
public void parquetWriteUndefinedPrecisionNumeric() throws Exception {
Expand Down
6 changes: 3 additions & 3 deletions ci/singlecluster/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ RUN sudo apt-get update && \
ENV HADOOP_VERSION=3.1.2
ENV HIVE_VERSION=3.1.3
ENV ZOOKEEPER_VERSION=3.5.9
ENV HBASE_VERSION=2.0.6
ENV HBASE_VERSION=2.3.7
ENV TEZ_VERSION=0.9.2

# checksums from archive.apache.org
ENV HADOOP_SHA512="0e0ee817c89b3c4eb761eca7f16640742a83b0e99b6fda26c1bee2baabedad93aab86e252bf5f1e2381c6d464bc4003d10c7cc0f61b2062f4c59732ca24d1bd9"
ENV HIVE_SHA256="0c9b6a6359a7341b6029cc9347435ee7b379f93846f779d710b13f795b54bb16"
ENV ZOOKEEPER_SHA512="0e5a64713abc6f36d961dd61a06f681868171a9d9228366e512a01324806d263e05508029c94d8e18307811867cdc39d848e736c252bf56c461273ef74c66a45"
ENV HBASE_SHA512="a0e10904ecf7f059b77bc0ce704254046a978126db720cc7e55dc53b87097715da64b8391fe3cc94348bc432871ad8f29891dc8df1ea052eb628da0fdca97c93"
ENV HBASE_SHA512="1032521025660daa70260cdc931f52a26c87596be444451fe1fa88b526ede55e9d6b4220e91ff6f7422bec11f30d64fa6745e95a9c36971fdb1a264a2c745693"
ENV TEZ_SHA512="a2d94bd9fa778d42a8bac9d9da8e263e469ddfef93968b06434716554995f490231de5607541ac236e770aa0158b64250c38bc1cd57dbfa629fea705f2ffa2f5"

# faster mirror:
Expand Down Expand Up @@ -63,7 +63,7 @@ RUN mkdir -p $ZOOKEEPER_ROOT && \
RUN mkdir -p $HBASE_ROOT && \
curl -fSL "$HBASE_URL" -o hbase.tar.gz && \
echo "$HBASE_SHA512 hbase.tar.gz" | sha512sum -c && \
tar xvf hbase.tar.gz -C $HBASE_ROOT --strip-components 1 --exclude="docs/*" && \
tar xvf hbase.tar.gz -C $HBASE_ROOT --strip-components 1 --exclude="docs/*" --exclude="lib/*-tests.jar" --exclude="lib/shaded-clients" && \
rm hbase.tar.gz

RUN mkdir -p $TEZ_ROOT && \
Expand Down
2 changes: 1 addition & 1 deletion ci/singlecluster/README.HDP3.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ It contains the following versions:
- Hadoop 3.3.6
- Hive 3.1.3
- Zookeeper 3.5.9
- HBase 2.0.6
- HBase 2.3.7
- Tez 0.9.2

This version of Single cluster requires users to make some manual changes to the configuration files once the tarball has been unpacked (see Initialization steps below).
Expand Down
4 changes: 2 additions & 2 deletions docs/content/hdfs_parquet.html.md.erb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ under the License.

Use the PXF HDFS connector to read and write Parquet-format data. This section describes how to read and write HDFS files that are stored in Parquet format, including how to create, query, and insert into external tables that reference files in the HDFS data store.

PXF supports reading or writing Parquet files compressed with these codecs: `snappy`, `gzip`, and `zstd`.
PXF supports reading or writing Parquet files compressed with these codecs: `snappy`, `gzip`, 'lz4_raw' and `zstd`.

PXF currently supports reading and writing primitive Parquet data types only.

Expand Down Expand Up @@ -182,7 +182,7 @@ The PXF `hdfs:parquet` profile supports encoding- and compression-related write

| Write Option | Value Description |
|-------|-------------------------------------|
| COMPRESSION_CODEC | The compression codec alias. Supported compression codecs for writing Parquet data include: `snappy`, `gzip`, `zstd`, and `uncompressed` . If this option is not provided, PXF compresses the data using `snappy` compression. |
| COMPRESSION_CODEC | The compression codec alias. Supported compression codecs for writing Parquet data include: `snappy`, `gzip`, `lz4_raw`, `zstd`, and `uncompressed` . If this option is not provided, PXF compresses the data using `snappy` compression. |
| ROWGROUP_SIZE | A Parquet file consists of one or more row groups, a logical partitioning of the data into rows. `ROWGROUP_SIZE` identifies the size (in bytes) of the row group. The default row group size is `8 * 1024 * 1024` bytes. |
| PAGE_SIZE | A row group consists of column chunks that are divided up into pages. `PAGE_SIZE` is the size (in bytes) of such a page. The default page size is `1 * 1024 * 1024` bytes. |
| ENABLE\_DICTIONARY | A boolean value that specifies whether or not to enable dictionary encoding. The default value is `true`; dictionary encoding is enabled when PXF writes Parquet files. |
Expand Down
33 changes: 26 additions & 7 deletions server/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,17 @@ configure(javaProjects) {
dependency("commons-configuration:commons-configuration:1.10")
dependency("commons-io:commons-io:2.7")
dependency("commons-lang:commons-lang:2.6")
dependency("commons-lang:commons-lang3:3.9")
dependency("commons-logging:commons-logging:1.1.3")
dependency("io.airlift:aircompressor:0.27")
dependency("io.airlift:aircompressor:2.0.2")
dependency("javax.jdo:jdo-api:3.0.1")
dependency("joda-time:joda-time:2.8.1")
dependency("net.sf.opencsv:opencsv:2.3")
dependency("org.antlr:antlr-runtime:3.5.2")
dependency("org.apache.commons:commons-compress:1.20")
dependency("org.apache.commons:commons-crypto:1.0.0")
dependency("org.apache.htrace:htrace-core:3.1.0-incubating")
dependency("org.apache.htrace:htrace-core4:4.0.1-incubating")
dependency("org.apache.htrace:htrace-core4:4.2.0-incubating")

dependency("org.apache.zookeeper:zookeeper:3.4.6")
dependency("org.codehaus.woodstox:stax2-api:3.1.4")
Expand All @@ -120,7 +122,7 @@ configure(javaProjects) {
dependency("org.threeten:threeten-extra:1.5.0")
dependency("org.tukaani:xz:1.8")
dependency("org.wildfly.openssl:wildfly-openssl:1.0.7.Final")
dependency("org.xerial.snappy:snappy-java:1.1.10.4")
dependency("org.xerial.snappy:snappy-java:1.1.10.7")

// Hadoop dependencies
dependencySet(group:"org.apache.hadoop", version:"${hadoopVersion}") {
Expand All @@ -139,11 +141,28 @@ configure(javaProjects) {

// HBase dependencies
dependencySet(group:"org.apache.hbase", version:"${hbaseVersion}") {
entry("hbase-annotations")
entry("hbase-client")
entry("hbase-common")
entry("hbase-protocol")
entry("hbase-protocol-shaded")
entry("hbase-logging")
entry("hbase-hadoop-compat")
entry("hbase-hadoop2-compat")
entry("hbase-metrics-api")
entry("hbase-metrics")
}
dependencySet(group:"org.apache.hbase.thirdparty", version:"3.3.0") {
entry("hbase-shaded-protobuf")
entry("hbase-shaded-miscellaneous")
entry("hbase-shaded-gson")
entry("hbase-shaded-netty")
entry("hbase-unsafe")
}
dependency("org.apache.yetus:audience-annotations:0.5.0")
dependency("io.opentelemetry:opentelemetry-api:1.49.0")
dependency("io.opentelemetry:opentelemetry-context:1.49.0")
dependency("io.opentelemetry.semconv:opentelemetry-semconv:1.29.0-alpha")
dependency("io.dropwizard.metrics:metrics-core:3.2.6")

// Hive dependencies
dependency("org.apache.hive:hive-storage-api:${hiveStorageApiVersion}")
Expand Down Expand Up @@ -193,7 +212,7 @@ configure(javaProjects) {
entry("avro")
entry("avro-mapred")
}
// Zstd support for Avro
// Zstd support for Avro/Parquet
dependency("com.github.luben:zstd-jni:1.5.7-6")

// Jackson 1.x dependencies
Expand Down Expand Up @@ -237,15 +256,15 @@ configure(javaProjects) {
options.compilerArgs += [
"-g", "-Xlint:varargs", "-Xlint:cast", "-Xlint:classfile", "-Xlint:dep-ann", "-Xlint:divzero",
"-Xlint:empty", "-Xlint:finally", "-Xlint:overrides", "-Xlint:path", "-Xlint:-processing", "-Xlint:static",
"-Xlint:try", "-Xlint:fallthrough", "-Xlint:deprecation", "-Xlint:unchecked", "-Xlint:-options", "-Werror"
"-Xlint:try", "-Xlint:fallthrough", "-Xlint:unchecked", "-Xlint:-options", "-Werror"
]
}

compileTestJava {
options.compilerArgs += [
"-g", "-Xlint:varargs", "-Xlint:cast", "-Xlint:classfile", "-Xlint:dep-ann", "-Xlint:divzero",
"-Xlint:empty", "-Xlint:finally", "-Xlint:overrides", "-Xlint:path", "-Xlint:-processing", "-Xlint:static",
"-Xlint:try", "-Xlint:fallthrough", "-Xlint:deprecation", "-Xlint:unchecked", "-Xlint:-options", "-Werror"
"-Xlint:try", "-Xlint:fallthrough", "-Xlint:unchecked", "-Xlint:-options", "-Werror"
]
}

Expand Down
4 changes: 2 additions & 2 deletions server/gradle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ license=ASL 2.0
hadoopVersion=2.10.2
hiveVersion=2.3.8
hiveStorageApiVersion=2.7.3
hbaseVersion=1.3.2
hbaseVersion=2.3.7
junitVersion=4.11
parquetVersion=1.12.3
parquetVersion=1.15.2
awsJavaSdk=1.12.261
springBootVersion=2.7.18
org.gradle.daemon=true
Expand Down
47 changes: 35 additions & 12 deletions server/pxf-hbase/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,47 @@ dependencies {
*******************************/

compileOnly("com.google.code.findbugs:annotations")
compileOnly("org.apache.hbase:hbase-annotations")

/*******************************
* Implementation Dependencies
* Project Dependencies
*******************************/

implementation(project(':pxf-api'))
implementation("com.google.protobuf:protobuf-java")
implementation("commons-collections:commons-collections")
implementation("org.apache.hbase:hbase-client") { transitive = false }
implementation("org.apache.hbase:hbase-common") { transitive = false }
implementation("org.apache.hbase:hbase-protocol") { transitive = false }
implementation("org.apache.htrace:htrace-core") { transitive = false }
implementation("org.apache.zookeeper:zookeeper") { transitive = false }
implementation("io.netty:netty-common") { transitive = false }
implementation("io.netty:netty-transport") { transitive = false }
implementation("com.yammer.metrics:metrics-core") { transitive = false }

/*******************************
* Hbase
*******************************/

implementation("org.apache.hbase:hbase-client") { transitive = false }
implementation("org.apache.hbase.thirdparty:hbase-shaded-protobuf") { transitive = false }
implementation("org.apache.hbase:hbase-common") { transitive = false }
implementation("org.apache.hbase:hbase-logging") { transitive = false }
implementation("org.apache.hbase.thirdparty:hbase-shaded-miscellaneous") { transitive = false }
implementation("org.apache.hbase.thirdparty:hbase-shaded-gson") { transitive = false }
implementation("org.apache.hbase.thirdparty:hbase-shaded-netty") { transitive = false }
implementation("org.apache.commons:commons-lang3") { transitive = false }
implementation("org.apache.commons:commons-crypto") { transitive = false }
implementation("org.apache.hadoop:hadoop-common") { transitive = false }
implementation("org.apache.hadoop:hadoop-auth") { transitive = false }
implementation("org.apache.hbase:hbase-hadoop-compat") { transitive = false }
implementation("org.apache.hbase:hbase-metrics-api") { transitive = false }
implementation("org.apache.hbase:hbase-metrics") { transitive = false }
implementation("org.apache.hbase:hbase-hadoop2-compat") { transitive = false }
implementation("org.apache.hbase:hbase-protocol-shaded") { transitive = false }
implementation("org.apache.hbase:hbase-protocol") { transitive = false }
implementation("com.google.protobuf:protobuf-java") { transitive = false }
implementation("org.apache.zookeeper:zookeeper") { transitive = false }
implementation("io.netty:netty-common") { transitive = false }
implementation("io.netty:netty-transport") { transitive = false }
// skip JRuby - it is part of interactive shell
// implementation("org.jruby.jcodings:jcodings:1.0.58") { transitive = false }
// implementation("org.jruby.joni:joni:2.2.1") { transitive = false }
implementation("org.apache.yetus:audience-annotations") { transitive = false }
implementation("io.opentelemetry:opentelemetry-api") { transitive = false }
implementation("io.opentelemetry:opentelemetry-context") { transitive = false }
implementation("io.opentelemetry.semconv:opentelemetry-semconv") { transitive = false }
implementation("io.dropwizard.metrics:metrics-core:3.2.6") { transitive = false }

implementation("org.springframework.boot:spring-boot-starter-log4j2")

Expand All @@ -39,7 +63,6 @@ dependencies {
*******************************/

testCompileOnly("com.google.code.findbugs:annotations")
testCompileOnly("org.apache.hbase:hbase-annotations")
testImplementation("com.esotericsoftware:minlog")
testImplementation("com.esotericsoftware:reflectasm")
testImplementation('org.springframework.boot:spring-boot-starter-test')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ public FragmentStats getFragmentStats() {
public List<Fragment> getFragments() throws Exception {

// check that Zookeeper and HBase master are available
HBaseAdmin.checkHBaseAvailable(configuration);
connection = ConnectionFactory.createConnection(configuration);
Admin hbaseAdmin = connection.getAdmin();
if (!HBaseUtilities.isTableAvailable(hbaseAdmin, context.getDataSource())) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
* under the License.
*/

import com.google.protobuf.ByteString;
import com.google.protobuf.InvalidProtocolBufferException;
import org.apache.hbase.thirdparty.com.google.protobuf.ByteString;
import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.filter.ByteArrayComparable;
import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.ComparatorProtos;
import org.apache.hadoop.hbase.util.Bytes;

public class HBaseDoubleComparator extends ByteArrayComparable {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
* under the License.
*/

import com.google.protobuf.ByteString;
import com.google.protobuf.InvalidProtocolBufferException;
import org.apache.hbase.thirdparty.com.google.protobuf.ByteString;
import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.filter.ByteArrayComparable;
import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.ComparatorProtos;
import org.apache.hadoop.hbase.util.Bytes;

public class HBaseFloatComparator extends ByteArrayComparable{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.filter.ByteArrayComparable;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.ComparatorProtos;
import org.apache.hadoop.hbase.util.Bytes;

import com.google.protobuf.ByteString;
import com.google.protobuf.InvalidProtocolBufferException;
import org.apache.hbase.thirdparty.com.google.protobuf.ByteString;
import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;

/**
* This is a Filter comparator for HBase It is external to PXF HBase code.
Expand Down
Loading