diff --git a/.github/resources/github_workflow.properties b/.github/resources/github_workflow.properties new file mode 100644 index 00000000..aa039e8e --- /dev/null +++ b/.github/resources/github_workflow.properties @@ -0,0 +1,35 @@ +# The script that abides by the multi-language protocol. This script will +# be executed by the MultiLangDaemon, which will communicate with this script +# over STDIN and STDOUT according to the multi-language protocol. +executableName = sample_kclpy_app.py + +# The name of an Amazon Kinesis stream to process. +# Important: streamArn takes precedence over streamName if both are set +streamName = STREAM_NAME_PLACEHOLDER + +# Used by the KCL as the name of this application. Will be used as the name +# of an Amazon DynamoDB table which will store the lease and checkpoint +# information for workers with this application name +applicationName = APP_NAME_PLACEHOLDER + +# Users can change the credentials provider the KCL will use to retrieve credentials. +# Expected key name (case-sensitive): +# AwsCredentialsProvider / AwsCredentialsProviderDynamoDB / AwsCredentialsProviderCloudWatch +# The DefaultCredentialsProvider checks several other providers, which is +# described here: +# https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/auth/credentials/DefaultCredentialsProvider.html +AwsCredentialsProvider = DefaultCredentialsProvider + +# Appended to the user agent of the KCL. Does not impact the functionality of the +# KCL in any other way. +processingLanguage = python/3.8 + +# Valid options at TRIM_HORIZON or LATEST. +# See http://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetShardIterator.html#API_GetShardIterator_RequestSyntax +initialPositionInStream = TRIM_HORIZON + +# The KCL defaults to us-east-1 +regionName = us-east-1 + +# Idle time between record reads in milliseconds. +idleTimeBetweenReadsInMillis = 250 diff --git a/.github/scripts/clean_up_stream_table.sh b/.github/scripts/clean_up_stream_table.sh index ee8967f6..a5ff3a13 100644 --- a/.github/scripts/clean_up_stream_table.sh +++ b/.github/scripts/clean_up_stream_table.sh @@ -1,19 +1,31 @@ #!/bin/bash -aws kinesis delete-stream --stream-name $STREAM_NAME || true +# Delete stream +if aws kinesis describe-stream --stream-name $STREAM_NAME &>/dev/null; then + echo "Deleting stream $STREAM_NAME" + for i in {1..10}; do + aws kinesis delete-stream --stream-name $STREAM_NAME && break || + echo "Stream deletion failed, attempt $i/10. Retrying Stream deletion in $((i * 3))s" && sleep $((i * 3)) + done +else + echo "Stream $STREAM_NAME does not exist and does not need to be cleaned up" +fi -# Delete all tables -for i in {1..10}; do - echo "Deleting table $APP_NAME" - aws dynamodb delete-table --table-name $APP_NAME && break || - echo "Table deletion failed, attempt $i/10. Retrying DynamoDB Table deletion in $((i * 3)) seconds" && sleep $((i * 3)) -done -for SUFFIX in "-CoordinatorState" "-WorkerMetricStats"; do - if aws dynamodb describe-table --table-name $APP_NAME$SUFFIX &>/dev/null; then - echo "Deleting table $APP_NAME$SUFFIX" +# Delete table +delete_table() { + table_name=$1 + if aws dynamodb describe-table --table-name $table_name &>/dev/null; then + echo "Deleting table $table_name" for i in {1..10}; do - aws dynamodb delete-table --table-name $APP_NAME$SUFFIX && break || + aws dynamodb delete-table --table-name $table_name && break || echo "Table deletion failed, attempt $i/10. Retrying DynamoDB Table deletion in $((i * 3))s" && sleep $((i * 3)) done + else + echo "Table $table_name does not exist and does not need to be cleaned up" fi +} + +# Delete all tables +for SUFFIX in "" "-CoordinatorState" "-WorkerMetricStats"; do + delete_table "$APP_NAME$SUFFIX" done \ No newline at end of file diff --git a/.github/scripts/manipulate_properties.sh b/.github/scripts/manipulate_properties.sh index 92b07f44..1b50fab2 100644 --- a/.github/scripts/manipulate_properties.sh +++ b/.github/scripts/manipulate_properties.sh @@ -1,29 +1,23 @@ #!/bin/bash set -e -# Manipulate sample.properties file that the KCL application pulls properties from (ex: streamName, applicationName) +# Manipulate github_workflow.properties file that the KCL application pulls properties from (ex: streamName, applicationName) # Depending on the OS, different properties need to be changed if [[ "$RUNNER_OS" == "macOS" ]]; then - sed -i "" "s/kclpysample/$STREAM_NAME/g" samples/sample.properties - sed -i "" "s/PythonKCLSample/$APP_NAME/g" samples/sample.properties - sed -i "" 's/us-east-5/us-east-1/g' samples/sample.properties - grep -v "idleTimeBetweenReadsInMillis" samples/sample.properties > samples/temp.properties - echo "idleTimeBetweenReadsInMillis = 250" >> samples/temp.properties - mv samples/temp.properties samples/sample.properties + sed -i "" "s/STREAM_NAME_PLACEHOLDER/$STREAM_NAME/g" .github/resources/github_workflow.properties + sed -i "" "s/APP_NAME_PLACEHOLDER/$APP_NAME/g" .github/resources/github_workflow.properties elif [[ "$RUNNER_OS" == "Linux" || "$RUNNER_OS" == "Windows" ]]; then - sed -i "s/kclpysample/$STREAM_NAME/g" samples/sample.properties - sed -i "s/PythonKCLSample/$APP_NAME/g" samples/sample.properties - sed -i 's/us-east-5/us-east-1/g' samples/sample.properties - sed -i "/idleTimeBetweenReadsInMillis/c\idleTimeBetweenReadsInMillis = 250" samples/sample.properties + sed -i "s/STREAM_NAME_PLACEHOLDER/$STREAM_NAME/g" .github/resources/github_workflow.properties + sed -i "s/APP_NAME_PLACEHOLDER/$APP_NAME/g" .github/resources/github_workflow.properties if [[ "$RUNNER_OS" == "Windows" ]]; then echo '@echo off' > samples/run_script.bat echo 'python %~dp0\sample_kclpy_app.py %*' >> samples/run_script.bat - sed -i 's/executableName = sample_kclpy_app.py/executableName = samples\/run_script.bat/' samples/sample.properties + sed -i 's/executableName = sample_kclpy_app.py/executableName = samples\/run_script.bat/' .github/resources/github_workflow.properties fi else echo "Unknown OS: $RUNNER_OS" exit 1 fi -cat samples/sample.properties \ No newline at end of file +cat .github/resources/github_workflow.properties \ No newline at end of file diff --git a/.github/scripts/start_kcl.sh b/.github/scripts/start_kcl.sh index e8f8c273..2082c70d 100644 --- a/.github/scripts/start_kcl.sh +++ b/.github/scripts/start_kcl.sh @@ -2,22 +2,15 @@ set -e set -o pipefail -chmod +x samples/sample.properties +chmod +x .github/resources/github_workflow.properties chmod +x samples/sample_kclpy_app.py -# Get records from stream to verify they exist before continuing -SHARD_ITERATOR=$(aws kinesis get-shard-iterator --stream-name $STREAM_NAME --shard-id shardId-000000000000 --shard-iterator-type TRIM_HORIZON --query 'ShardIterator' --output text) -INITIAL_RECORDS=$(aws kinesis get-records --shard-iterator $SHARD_ITERATOR) -RECORD_COUNT_BEFORE=$(echo $INITIAL_RECORDS | jq '.Records | length') - -echo "Found $RECORD_COUNT_BEFORE records in stream before KCL start" - if [[ "$RUNNER_OS" == "macOS" ]]; then brew install coreutils - KCL_COMMAND=$(amazon_kclpy_helper.py --print_command --java $(which java) --properties samples/sample.properties) + KCL_COMMAND=$(amazon_kclpy_helper.py --print_command --java $(which java) --properties .github/resources/github_workflow.properties) gtimeout $RUN_TIME_SECONDS $KCL_COMMAND 2>&1 | tee kcl_output.log || [ $? -eq 124 ] elif [[ "$RUNNER_OS" == "Linux" || "$RUNNER_OS" == "Windows" ]]; then - KCL_COMMAND=$(amazon_kclpy_helper.py --print_command --java $(which java) --properties samples/sample.properties) + KCL_COMMAND=$(amazon_kclpy_helper.py --print_command --java $(which java) --properties .github/resources/github_workflow.properties) timeout $RUN_TIME_SECONDS $KCL_COMMAND 2>&1 | tee kcl_output.log || [ $? -eq 124 ] else echo "Unknown OS: $RUNNER_OS" diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 0d05ac08..43f93e63 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -112,8 +112,8 @@ jobs: echo "STREAM_NAME=$STREAM_NAME" >> $GITHUB_ENV echo "APP_NAME=$APP_NAME" >> $GITHUB_ENV - # Manipulate sample.properties file to use unique stream name, application name, and OS specific program changes - - name: Manipulate sample.properties file + # Manipulate github_workflow.properties file to use unique stream name, application name, and OS specific program changes + - name: Manipulate github_workflow.properties file run: | chmod +x .github/scripts/manipulate_properties.sh .github/scripts/manipulate_properties.sh