The repository contains a set of utilities for preparing data (regular and parquet files) and generating load on Apache Hadoop.
python files-generator.py \
--nameservice '' \
--parquet \
--hdfs_upload_folder '' \
--folder_name '' \
--sub_folder_count 10 \
--files_count 500 \
--file_size 5 \
--hive_server "" \
--database default \
--local_tmp_folder /tmppython load-hdfs-testing.py \
--path '' \
--parallel_threads 4 \
--request_count 10 \
--download_folder /tmppython load-hive-testing.py \
--hive_server '' \
--parallel_threads 4 \
--request_count 10 \
--database '' \
--table '' \
--selectpython load-spark-testing.py \
--nameservice '' \
--parallel_threads 4 \
--request_count 10 \
--path '' \
--select