diff --git a/.github/workflows/README.md b/.github/workflows/README.md new file mode 100644 index 00000000..064f1e5e --- /dev/null +++ b/.github/workflows/README.md @@ -0,0 +1,85 @@ +# GitHub Actions Workflows +These workflows define a CI/CD pipeline that is intended to work both for the +main repo and for forks. + +GitHub Actions workflows overview: +- Run unit tests and style checks against PR's +- Run end-to-end tests whenever a PR is merged +- Build and publish zip/yaml files needed to spin up a TEA stack on releases + +In order for the end-to-end tests to run you need to set the `RUN_TESTS` secret +in your repository secrets to `true`. However, currently these tests assume +they are running in ASF's environment, so enabling them is probably not desired +on forked repos, hence they are disabled by default. All other secrets are +stored in GitHub environments. + +## Environments +There are two environments used to control which AWS resources will be modified +by GitHub Actions. The `prod` environment is used for pushing finished build +artifacts and build status files to a public bucket. The `test` environment is +used for deploying a test stack and running the end-to-end tests against it. +Unless you enable the end-to-end tests with the `RUN_TESTS` repository secret, +you don't need to configure any secrets in the `test` environment. + +- prod + - AWS Credentials (see below) +- test + - AWS Credentials (see below) + - `URS_USERNAME` URS username used by end-to-end tests for authenticated + downloads + - `URS_PASSWORD` URS password used by end-to-end tests for authenticated + downloads + - If the config file doesn't specify a value for `URS_AUTH_CREDS_SECRET_NAME`, + the following secrets are also needed: + - `URS_CLIENT_ID` + - `EDL_APP_UID` + - `EDL_APP_PASSWORD` + +### Setting up AWS Credentials +- `AWS_ACCESS_KEY_ID` +- `AWS_SECRET_ACCESS_KEY` +- (optional) `AWS_ROLE_ARN` +- (optional) `AWS_REGION` + +## Config file +Unfortunately, GitHub currently doesn't support non-secret configuration +variables. Therefore all non-secret configuration is stored in `.env` +files located in the `config-public` directory. The file names correspond to +the environment names that the configuraiton belongs to. After forking the repo, +you can commit changes to these files to adjust the setup for your use case. + +### Build Configuration +The build configuration is located in `prod.env` and includes the following +options: + +- `CODE_BUCKET` Bucket to upload build results to. Needs to allow public ACL. +- (optional) `CODE_PREFIX` All objects uploaded to the code bucket get + this prefix + +### Test Configuration +The end-to-end test configuration is located in `test.env` and includes the +following options: + +- `BUCKET_MAP_FILE` Name of the bucket map file to use from the config +bucket. +- `BUCKETNAME_PREFIX` +- `CODE_BUCKET` Bucket to upload build results to for testing. Probably a + private bucket. +- `CODE_PREFIX` All objects uploaded to the code bucket get this prefix. +- `CONFIG_BUCKET` Bucket containing configuration files such as + the bucket map. Defaults to `CODE_BUCKET` +- `COOKIE_DOMAIN` +- `DOMAIN_CERT_ARN` +- `DOMAIN_NAME` +- `JWT_KEY_SECRET_NAME` Name of the AWS SecretsManager secret + containing the JWT public and private keys. This can be omitted and a + secret will be created automatically with a newly generated key pair. +- `STACK_NAME` Name of the CloudFormation stack to update +- `URS_AUTH_CREDS_SECRET_NAME` Name of the AWS SecretsManager + secret containing URS client id and client secret. This can be omitted + and a secret will be created automatically using the following github + environment secrets: + - `URS_CLIENT_ID` + - `EDL_APP_UID` + - `EDL_APP_PASSWORD` +- `URS_URL` URL to use for Earthdata login. diff --git a/.github/workflows/config-public/prod.env b/.github/workflows/config-public/prod.env new file mode 100644 index 00000000..1e60f426 --- /dev/null +++ b/.github/workflows/config-public/prod.env @@ -0,0 +1,2 @@ +CODE_BUCKET=bbarton-dev +CODE_PREFIX=thin-egress-app-prodbuild/ diff --git a/.github/workflows/config-public/test.env b/.github/workflows/config-public/test.env new file mode 100644 index 00000000..766cd5f1 --- /dev/null +++ b/.github/workflows/config-public/test.env @@ -0,0 +1,13 @@ +BUCKET_MAP_FILE=bucket_map_customheaders.yaml +BUCKETNAME_PREFIX=rain-uw2-t- +CODE_BUCKET=bbarton-dev +CODE_PREFIX=thin-egress-app-benbart/ +CONFIG_BUCKET=rain-uw2-t-config +COOKIE_DOMAIN=.asf.alaska.edu +DOMAIN_CERT_ARN=arn:aws:acm:us-east-1:117169578524:certificate/1f3945cf-cac7-4d75-ad8d-b8920534fea2 +DOMAIN_NAME=tea-test-jenk-0.asf.alaska.edu +JWT_KEY_SECRET_NAME= +STACK_NAME=teatest-jenk-same +URS_AUTH_CREDS_SECRET_NAME=URS_creds_ASF_DATA_ACCESS_EGRESS_CONTROL +URS_URL=https://urs.earthdata.nasa.gov +FOO=BAR diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..59b1db0e --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,18 @@ +name: Lint +on: + pull_request: + +jobs: + flake8: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: 3.8 + + - run: pip install -r requirements.txt + + - uses: TrueBrain/actions-flake8@v2 + with: + plugins: flake8-isort diff --git a/.github/workflows/re-build.yml b/.github/workflows/re-build.yml new file mode 100644 index 00000000..fceb914a --- /dev/null +++ b/.github/workflows/re-build.yml @@ -0,0 +1,96 @@ +# Reusable workflow for building artifacts +name: Build + +on: + workflow_call: + inputs: + environment: + required: true + type: string + + outputs: + version: + value: ${{ jobs.variables.outputs.version }} + code-zip: + value: ${{ jobs.variables.outputs.code-zip }} + dependency-zip: + value: ${{ jobs.variables.outputs.dependency-zip }} + cloudformation-yaml: + value: ${{ jobs.variables.outputs.cloudformation-yaml }} + terraform-zip: + value: ${{ jobs.variables.outputs.terraform-zip }} + + +jobs: + # Generate some names from the version info in the tag name + variables: + runs-on: ubuntu-latest + outputs: + version: ${{ steps.step1.outputs.version }} + code-zip: ${{ steps.step1.outputs.code-zip }} + dependency-zip: ${{ steps.step1.outputs.dependency-zip }} + cloudformation-yaml: ${{ steps.step1.outputs.cloudformation-yaml }} + terraform-zip: ${{ steps.step1.outputs.terraform-zip }} + + steps: + - id: step1 + run: | + VERSION=${GITHUB_REF_NAME#*.} + echo "::set-output name=version::$VERSION" + echo "::set-output name=code-zip::tea-code-build.$VERSION.zip" + echo "::set-output name=dependency-zip::tea-dependencylayer-build.$VERSION.zip" + echo "::set-output name=cloudformation-yaml::tea-cloudformation-build.$VERSION.yaml" + echo "::set-output name=terraform-zip::tea-terraform-build.$VERSION.zip" + + # Build everything + build-all: + runs-on: ubuntu-latest + environment: ${{ inputs.environment }} + needs: variables + + steps: + - uses: actions/checkout@v2 + + - name: Load environment defaults + run: cat .github/workflows/config-public/${{ inputs.environment }}.env >> $GITHUB_ENV + + - name: Set Makefile.config + run: | + TIMESTAMP=$(TZ=America/Anchorage date) + + cat > Makefile.config << EOF + BUILD_ID := ${{ needs.variables.outputs.version }} + + CF_DEFAULT_CODE_BUCKET := ${{ env.CODE_BUCKET }} + CF_DEFAULT_DEPENDENCY_ARCHIVE_KEY := ${{ env.CODE_PREFIX }}${{ needs.variables.outputs.code-zip }} + CF_DEFAULT_CODE_ARCHIVE_KEY := ${{ env.CODE_PREFIX }}${{ needs.variables.outputs.dependency-zip }} + CF_BUILD_VERSION := \$(BUILD_ID) + CF_DESCRIPTION := TEA version ${{ needs.variables.outputs.version }} (${GITHUB_SHA:0:7}) built by GitHub Actions on $TIMESTAMP. + EOF + + - name: Build artifacts + run: make build + + - name: Save dependency layer zip + uses: actions/upload-artifact@v2 + with: + name: dependency-layer + path: dist/thin-egress-app-dependencies.zip + + - name: Save Lambda code + uses: actions/upload-artifact@v2 + with: + name: code + path: dist/thin-egress-app-code.zip + + - name: Save CloudFormation yaml + uses: actions/upload-artifact@v2 + with: + name: cloudformation + path: dist/thin-egress-app.yaml + + - name: Save Terraform zip + uses: actions/upload-artifact@v2 + with: + name: terraform + path: dist/thin-egress-app-terraform.zip diff --git a/.github/workflows/re-status.yml b/.github/workflows/re-status.yml new file mode 100644 index 00000000..0d64dff0 --- /dev/null +++ b/.github/workflows/re-status.yml @@ -0,0 +1,91 @@ +# Reusable workflow for reporting build status +name: Status + +on: + workflow_call: + inputs: + environment: + required: true + type: string + build_tag: + required: true + type: string + success: + required: true + type: boolean + + secrets: + AWS_ACCESS_KEY_ID: + required: true + AWS_SECRET_ACCESS_KEY: + required: true + # Optional + AWS_ROLE_ARN: + required: false + AWS_REGION: + required: false + + +jobs: + report-success: + if: inputs.success + runs-on: ubuntu-latest + environment: ${{ inputs.environment }} + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_ROLE_ARN: ${{ secrets.AWS_ROLE_ARN }} + AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }} + + steps: + - uses: actions/checkout@v2 + + - name: Load environment defaults + run: cat .github/workflows/config-public/${{ inputs.environment }}.env >> $GITHUB_ENV + + - uses: actions/download-artifact@v2 + + - name: Upload success labels + run: | + mkdir -p buildreport + echo '{"schemaVersion": 1, "label": "Build Status", "message": "Success", "color": "success"}' > buildreport/buildstatus.json + echo '{"schemaVersion": 1, "label": "Last Successful Build", "message": "${{ inputs.build_tag }}", "color": "success"}' > buildreport/lastgoodbuild.json + echo '{"schemaVersion": 1, "label": "Last Build", "message": "${{ inputs.build_tag }}", "color": "success"}' > buildreport/lastbuild.json + aws s3 cp buildreport/ "s3://${CODE_BUCKET}/thin-egress-app/" \ + --recursive \ + --metadata-directive REPLACE \ + --cache-control no-cache \ + --expires '2016-06-14T00:00:00Z' \ + --content-type 'application/json' \ + --acl public-read + + report-failures: + if: ${{ !inputs.success }} + runs-on: ubuntu-latest + environment: ${{ inputs.environment }} + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_ROLE_ARN: ${{ secrets.AWS_ROLE_ARN }} + AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }} + + steps: + - uses: actions/checkout@v2 + + - name: Load environment defaults + run: cat .github/workflows/config-public/${{ inputs.environment }}.env >> $GITHUB_ENV + + - uses: actions/download-artifact@v2 + + - name: Upload failure labels + run: | + mkdir -p buildreport + echo '{"schemaVersion": 1, "label": "Build Status", "message": "Failed!", "color": "critical"}' > buildreport/buildstatus.json + echo '{"schemaVersion": 1, "label": "Last Build", "message": "${{ inputs.build_tag }}", "color": "critical"}' > buildreport/lastbuild.json + aws s3 cp buildreport/ "s3://${CODE_BUCKET}/thin-egress-app/" \ + --recursive \ + --metadata-directive REPLACE \ + --cache-control no-cache \ + --expires '2016-06-14T00:00:00Z' \ + --content-type 'application/json' \ + --acl public-read diff --git a/.github/workflows/re-test-e2e.yml b/.github/workflows/re-test-e2e.yml new file mode 100644 index 00000000..038711ed --- /dev/null +++ b/.github/workflows/re-test-e2e.yml @@ -0,0 +1,194 @@ +# Reusable workflow for running end-to-end tests +name: End-to-End Test + +on: + workflow_call: + inputs: + environment: + required: true + type: string + dependency-zip: + required: true + type: string + code-zip: + required: true + type: string + cloudformation-yaml: + required: true + type: string + + secrets: + AWS_ACCESS_KEY_ID: + required: true + AWS_SECRET_ACCESS_KEY: + required: true + URS_USERNAME: + required: true + URS_PASSWORD: + required: true + # Optional + AWS_ROLE_ARN: + required: false + AWS_REGION: + required: false + URS_CLIENT_ID: + required: false + EDL_APP_UID: + required: false + EDL_APP_PASSWORD: + required: false + +jobs: + # Deploy to the test environment and run end to end tests + test-end-to-end: + runs-on: ubuntu-latest + environment: ${{ inputs.environment }} + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_ROLE_ARN: ${{ secrets.AWS_ROLE_ARN }} + AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }} + + steps: + - uses: actions/checkout@v2 + + - name: Load environment defaults + run: cat .github/workflows/config-public/${{ inputs.environment }}.env >> $GITHUB_ENV + + - uses: actions/download-artifact@v2 + + - name: Upload to AWS + env: + S3_PATH_PREFIX: s3://${{ env.CODE_BUCKET }}/${{ env.CODE_PREFIX }} + run: | + aws s3 cp ./dependency-layer/thin-egress-app-dependencies.zip ${S3_PATH_PREFIX}${{ inputs.dependency-zip }} + aws s3 cp ./code/thin-egress-app-code.zip ${S3_PATH_PREFIX}${{ inputs.code-zip }} + aws s3 cp ./cloudformation/thin-egress-app.yaml ${S3_PATH_PREFIX}${{ inputs.cloudformation-yaml }} + + - name: Update JWT secret + if: env.JWT_KEY_SECRET_NAME == '' + run: | + JWT_KEY_SECRET_NAME="$STACK_NAME-jwt-key" + echo "JWT_KEY_SECRET_NAME=$JWT_KEY_SECRET_NAME" >> $GITHUB_ENV + + # Check if the secret exists + secret_arn=$(aws secretsmanager describe-secret \ + --secret-id "$JWT_KEY_SECRET_NAME" \ + --query "ARN" --output=text 2>&1 \ + | grep -v ResourceNotFoundException) + + if [ -z "$secret_arn" ]; then + echo "Creating JWT secret '$JWT_KEY_SECRET_NAME'" + + ssh-keygen -t rsa -b 4096 -m PEM -N '' -f "$JWT_KEY_SECRET_NAME" + rsa_priv_key=$(openssl base64 -in "$JWT_KEY_SECRET_NAME" -A) + rsa_pub_key=$(openssl base64 -in "$JWT_KEY_SECRET_NAME.pub" -A) + cat << EOF > keys.txt + { + "rsa_priv_key": "$rsa_priv_key", + "rsa_pub_key": "$rsa_pub_key" + } + EOF + + aws secretsmanager create-secret \ + --name "$JWT_KEY_SECRET_NAME" \ + --description "JWT keys for TEA $STACK_NAME created by GitHub Actions" \ + --secret-string file://keys.txt + + shred keys.txt + else + echo "JWT secret '$JWT_KEY_SECRET_NAME' already exists" + fi + + - name: Update URS secret + if: env.URS_AUTH_CREDS_SECRET_NAME == '' + run: | + URS_AUTH_CREDS_SECRET_NAME="$STACK_NAME-jwt-key" + echo "URS_AUTH_CREDS_SECRET_NAME=$URS_AUTH_CREDS_SECRET_NAME" >> $GITHUB_ENV + + # Check if the secret exists + secret_arn=$(aws secretsmanager describe-secret \ + --secret-id "$URS_AUTH_CREDS_SECRET_NAME" \ + --query "ARN" --output=text 2>&1 \ + | grep -v ResourceNotFoundException) + + urs_auth=$(echo -n "${{ secrets.EDL_APP_UID }}:${{ secrets.EDL_APP_PASSWORD }}" | base64) + cat << EOF > secret.txt + { + "UrsAuth": "$urs_auth", + "UrsId": "${{ secrets.URS_CLIENT_ID }}" + } + EOF + + if [ -z "$secret_arn" ]; then + echo ">> Creating URS secret '$URS_AUTH_CREDS_SECRET_NAME'" + + aws secretsmanager create-secret \ + --name "$URS_AUTH_CREDS_SECRET_NAME" \ + --description "URS creds for TEA $STACK_NAME created by GitHub Actions" \ + --secret-string file://secret.txt + + else + echo ">> Updating URS secret '$URS_AUTH_CREDS_SECRET_NAME'" + + aws secretsmanager put-secret-value \ + --secret-id "$URS_AUTH_CREDS_SECRET_NAME" \ + --secret-string file://secret.txt + fi + + shred secret.txt + + - name: Update CloudFormation stack description + run: | + cp ./cloudformation/thin-egress-app.yaml ./cloudformation/thin-egress-app-deployed.yaml + + sed -i -E "s;^(Description:.*)\";\1 Job: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID\";" ./cloudformation/thin-egress-app-deployed.yaml + + - name: Deploy CloudFormation stack + run: | + aws cloudformation deploy \ + --stack-name "$STACK_NAME" \ + --template-file "./cloudformation/thin-egress-app-deployed.yaml" \ + --capabilities "CAPABILITY_NAMED_IAM" \ + --parameter-overrides \ + AuthBaseUrl="$URS_URL" \ + BucketMapFile="$BUCKET_MAP_FILE" \ + BucketnamePrefix="$BUCKETNAME_PREFIX" \ + ConfigBucket="$CONFIG_BUCKET" \ + CookieDomain="$COOKIE_DOMAIN" \ + DomainCertArn="$DOMAIN_CERT_ARN" \ + DomainName="$DOMAIN_NAME" \ + EnableApiGatewayLogToCloudWatch="False" \ + JwtAlgo="RS256" \ + JwtKeySecretName="$JWT_KEY_SECRET_NAME" \ + LambdaCodeDependencyArchive="${CODE_PREFIX}${{ inputs.dependency-zip }}" \ + LambdaCodeS3Bucket="$CODE_BUCKET" \ + LambdaCodeS3Key="${CODE_PREFIX}${{ inputs.code-zip }}" \ + LambdaMemory="128" \ + Loglevel="DEBUG" \ + Logtype="json" \ + Maturity="TEST"\ + StageName="API" \ + URSAuthCredsSecretName="$URS_AUTH_CREDS_SECRET_NAME" \ + UseCorsCookieDomain="True" \ + UseReverseBucketMap="False" \ + DownloadRoleArn= \ + DownloadRoleInRegionArn= \ + HtmlTemplateDir= \ + PermissionsBoundaryName= \ + PrivateBucketsFile= \ + PrivateVPC= \ + PublicBucketsFile= \ + VPCSecurityGroupIDs= \ + VPCSubnetIDs= + + - name: Run end to end tests + env: + URS_USERNAME: ${{ secrets.URS_USERNAME }} + URS_PASSWORD: ${{ secrets.URS_PASSWORD }} + run: | + pip install -r requirements-dev.txt + pytest tests_e2e \ + --stack-name=$STACK_NAME \ + --test-results=asf.public.code/thin-egress-app/testresults.json \ + --log-cli-level=DEBUG diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..3049c9c9 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,147 @@ +# This workflow runs whenever a GitHub release is created. It gets the version +# number from the git tag, builds and tests TEA, and then uploads the artifacts +# to the public bucket for distribution. +name: Release + +on: + release: + types: + - released + +jobs: + build: + uses: ./.github/workflows/re-build.yml + with: + environment: prod + + # The secrets context is not supported on job level `if`. So we need to hack around it: + # https://github.com/actions/runner/issues/520#issuecomment-907427748 + check-tests: + runs-on: ubuntu-latest + outputs: + run-tests: ${{ steps.eval.outputs.val }} + steps: + - id: eval + env: + RUN_TESTS: ${{ secrets.RUN_TESTS }} + if: "${{ env.RUN_TESTS == 'true' }}" + run: echo "::set-output name=val::yes" + + test-e2e: + needs: + - build + - check-tests + if: needs.check-tests.outputs.run-tests == 'yes' + uses: ./.github/workflows/re-test-e2e.yml + with: + environment: test + dependency-zip: ${{ needs.build.outputs.dependency-zip }} + code-zip: ${{ needs.build.outputs.code-zip }} + cloudformation-yaml: ${{ needs.build.outputs.cloudformation-yaml }} + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_ROLE_ARN: ${{ secrets.AWS_ROLE_ARN }} + AWS_REGION: ${{ secrets.AWS_REGION }} + URS_USERNAME: ${{ secrets.URS_USERNAME }} + URS_PASSWORD: ${{ secrets.URS_PASSWORD }} + URS_CLIENT_ID: ${{ secrets.URS_CLIENT_ID }} + EDL_APP_UID: ${{ secrets.EDL_APP_UID }} + EDL_APP_PASSWORD: ${{ secrets.EDL_APP_PASSWORD }} + + publish: + runs-on: ubuntu-latest + environment: prod + needs: + - build + - test-e2e + if: success() || needs.test-e2e.result == 'skipped' + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_ROLE_ARN: ${{ secrets.AWS_ROLE_ARN }} + AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }} + + steps: + - uses: actions/checkout@v2 + + - name: Load environment defaults + run: cat .github/workflows/config-public/prod.env >> $GITHUB_ENV + + - uses: actions/download-artifact@v2 + + - name: Upload to public code bucket + env: + S3_PATH_PREFIX: s3://${{ env.CODE_BUCKET }}/${{ env.CODE_PREFIX }} + run: | + aws s3 cp ./dependency-layer/thin-egress-app-dependencies.zip ${S3_PATH_PREFIX}${{ needs.build.outputs.dependency-zip }} + aws s3 cp ./code/thin-egress-app-code.zip ${S3_PATH_PREFIX}${{ needs.build.outputs.code-zip }} + aws s3 cp ./cloudformation/thin-egress-app.yaml ${S3_PATH_PREFIX}${{ needs.build.outputs.cloudformation-yaml }} + aws s3 cp ./terraform/thin-egress-app-terraform.zip ${S3_PATH_PREFIX}${{ needs.build.outputs.terraform-zip }} + + echo '{"schemaVersion": 1, "label": "Last Release", "message": "'$GITHUB_REF_NAME'", "color": "success"}' > lastrelease.json + aws s3 cp lastrelease.json s3://${CODE_BUCKET}/thin-egress-app/ \ + --metadata-directive REPLACE \ + --cache-control no-cache \ + --expires '2016-06-14T00:00:00Z' \ + --content-type 'application/json' \ + --acl public-read + + + release-assets: + runs-on: ubuntu-latest + needs: + - build + - test-e2e + if: success() || needs.test-e2e.result == 'skipped' + steps: + - uses: actions/download-artifact@v2 + + - uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ github.event.release.upload_url }} + asset_path: ./dependency-layer/thin-egress-app-dependencies.zip + asset_name: ${{ needs.build.outputs.dependency-zip }} + asset_content_type: application/zip + - uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ github.event.release.upload_url }} + asset_path: ./code/thin-egress-app-code.zip + asset_name: ${{ needs.build.outputs.code-zip }} + asset_content_type: application/zip + - uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ github.event.release.upload_url }} + asset_path: ./cloudformation/thin-egress-app.yaml + asset_name: ${{ needs.build.outputs.cloudformation-yaml }} + asset_content_type: application/vnd.yaml + - uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ github.event.release.upload_url }} + asset_path: ./terraform/thin-egress-app-terraform.zip + asset_name: ${{ needs.build.outputs.terraform-zip }} + asset_content_type: application/zip + + status: + if: always() + needs: + - build + - publish + uses: ./.github/workflows/re-status.yml + with: + environment: prod + build_tag: ${{ needs.build.outputs.version }} + success: ${{ needs.build.result != 'failure' && needs.publish.result != 'failure' }} + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_ROLE_ARN: ${{ secrets.AWS_ROLE_ARN }} + AWS_REGION: ${{ secrets.AWS_REGION }} diff --git a/.github/workflows/test-e2e.yml b/.github/workflows/test-e2e.yml new file mode 100644 index 00000000..9bcbf480 --- /dev/null +++ b/.github/workflows/test-e2e.yml @@ -0,0 +1,67 @@ +name: End-to-End Test + +on: + push: + branches: + - devel + - master + +jobs: + build: + uses: ./.github/workflows/re-build.yml + with: + environment: prod + + # The secrets context is not supported on job level `if`. So we need to hack around it: + # https://github.com/actions/runner/issues/520#issuecomment-907427748 + check-tests: + runs-on: ubuntu-latest + outputs: + run-tests: ${{ steps.eval.outputs.val }} + steps: + - id: eval + env: + RUN_TESTS: ${{ secrets.RUN_TESTS }} + if: "${{ env.RUN_TESTS == 'true' }}" + run: echo "::set-output name=val::yes" + + test-e2e: + needs: + - build + - check-tests + if: needs.check-tests.outputs.run-tests == 'yes' + uses: ./.github/workflows/re-test-e2e.yml + with: + environment: test + dependency-zip: ${{ needs.build.outputs.dependency-zip }} + code-zip: ${{ needs.build.outputs.code-zip }} + cloudformation-yaml: ${{ needs.build.outputs.cloudformation-yaml }} + # Reusable workflows + Environments behave very strangely + # https://github.com/AllanOricil/workflow-template-bug/blob/fc8ae4264938adb560fa6928cb19c69d110d8bbd/.github/workflows/workflow-inplementation.yml#L46 + # Yea, seriously hope this gets fixed!!!!! + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_ROLE_ARN: ${{ secrets.AWS_ROLE_ARN }} + AWS_REGION: ${{ secrets.AWS_REGION }} + URS_USERNAME: ${{ secrets.URS_USERNAME }} + URS_PASSWORD: ${{ secrets.URS_PASSWORD }} + URS_CLIENT_ID: ${{ secrets.URS_CLIENT_ID }} + EDL_APP_UID: ${{ secrets.EDL_APP_UID }} + EDL_APP_PASSWORD: ${{ secrets.EDL_APP_PASSWORD }} + + status: + if: always() + needs: + - build + - test-e2e + uses: ./.github/workflows/re-status.yml + with: + environment: prod + build_tag: ${{ needs.build.outputs.version }} + success: ${{ needs.build.result != 'failure' && needs.test-e2e.result != 'failure' }} + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_ROLE_ARN: ${{ secrets.AWS_ROLE_ARN }} + AWS_REGION: ${{ secrets.AWS_REGION }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000..1cd1a3ef --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,31 @@ +name: Unit Test + +on: + pull_request: + push: + branches: + - devel + - master + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v1 + with: + python-version: 3.8 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt -r requirements-dev.txt + + - run: pip install pytest-github-actions-annotate-failures + + - run: pytest --cov=lambda --cov-report=term-missing --cov-report=xml --cov-branch tests + + - name: Report coverage + uses: codecov/codecov-action@v2 + with: + fail_ci_if_error: true diff --git a/.gitignore b/.gitignore index 073fc0c8..0b0f38c6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,13 +1,27 @@ +# Python *.pyc +/venv + +# IDE +.idea + +# OS .DS_Store -/lambda/.chalice -/.idea + +# Tools .terraform -/venv -/pkg /terraform/providers.tf -bucket_map.yaml terraform.tfstate* *.tfvars +/lambda/.chalice +.coverage + +# Config +bucket_map.yaml +Makefile.config + +# Other +/pkg build/requirements_dev.txt build/tea-dev.Dockerfile +dist diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 32d86326..00000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "rain-api-core"] - path = rain-api-core - url = git@github.com:asfadmin/rain-api-core.git diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..6f62d874 --- /dev/null +++ b/Makefile @@ -0,0 +1,252 @@ +SOURCES := \ + lambda/app.py \ + lambda/tea_bumper.py \ + lambda/update_lambda.py + +RESOURCES := $(wildcard lambda/templates/*) +TERRAFORM := $(wildcard terraform/*) + +# Output directory +DIR := dist +EMPTY := $(DIR)/empty +# Temporary artifacts +DIST_SOURCES := $(SOURCES:lambda/%=$(DIR)/code/%) +DIST_RESOURCES := $(RESOURCES:lambda/%=$(DIR)/code/%) +DIST_TERRAFORM := $(TERRAFORM:terraform/%=$(DIR)/terraform/%) + +BUCKET_MAP_OBJECT_KEY := DEFAULT + +DATE := $(shell date -u "+%b %d %Y, %T %Z") +DATE_SHORT := $(shell date -u "+%Y%m%dT%H%M%S") +BUILD_ID := $(shell git rev-parse --short HEAD 2>/dev/null || echo "SNAPSHOT") + +DOCKER := docker +# On Linux we need to do a bit of userid finagling so that the output files +# end up being owned by us and not by root. On Mac this works out of the box. +DOCKER_USER_ARG := --user "$(shell id -u):$(shell id -g)" +DOCKER_COMMAND := $(DOCKER) run --rm $(DOCKER_USER_ARG) -v "$$PWD":/var/task lambci/lambda:build-python3.8 + +##################### +# Deployment Config # +##################### + +# A tag to distinguish between artifacts of the same name. The tag should be +# different for each build. +S3_ARTIFACT_TAG = $(DATE_SHORT) + + +# Include custom configuration +Makefile.config: + @echo + @echo "It looks like you are building TEA for the first time." + @echo "Please review the configuration in '$@' and run Make again." + @echo + @cp Makefile.config.example $@ + @exit 1 + +include Makefile.config + + +.DEFAULT_GOAL := all +.PHONY: all +all: build ; + +############################## +# Local building/development # +############################## + +# Build everything +.PHONY: build +build: \ + $(DIR)/thin-egress-app-code.zip \ + $(DIR)/thin-egress-app-dependencies.zip \ + $(DIR)/thin-egress-app.yaml \ + $(DIR)/thin-egress-app-terraform.zip + +# Build individual components +.PHONY: dependencies +dependencies: $(DIR)/thin-egress-app-dependencies.zip + @echo "Built dependency layer for version ${BUILD_ID}" + +.PHONY: code +code: $(DIR)/thin-egress-app-code.zip + @echo "Built code for version ${BUILD_ID}" + +.PHONY: yaml +yaml: $(DIR)/thin-egress-app.yaml + @echo "Built CloudFormation template for version ${BUILD_ID}" + +.PHONY: terraform +terraform: $(DIR)/thin-egress-app-terraform.zip + @echo "Build Terraform zip file for version ${BUILD_ID}" + +.PHONY: clean +clean: + rm -rf $(DIR) + +$(DIR)/thin-egress-app-dependencies.zip: requirements.txt + rm -rf $(DIR)/python + @mkdir -p $(DIR)/python + $(DOCKER_COMMAND) build/dependency_builder.sh "$(DIR)/thin-egress-app-dependencies.zip" "$(DIR)" + +.SECONDARY: $(DIST_RESOURCES) +$(DIST_RESOURCES): $(DIR)/code/%: lambda/% + @mkdir -p $(@D) + cp $< $@ + +.SECONDARY: $(DIST_SOURCES) +$(DIST_SOURCES): $(DIR)/code/%: lambda/% + @mkdir -p $(@D) + cp $< $@ + python3 scripts/sed.py -i $@ "" "${BUILD_ID}" + +$(DIR)/thin-egress-app-code.zip: $(DIST_SOURCES) $(DIST_RESOURCES) + @mkdir -p $(DIR)/code + cd $(DIR)/code && zip -r ../thin-egress-app-code.zip . + +$(DIR)/bucket-map.yaml: + cp config/bucket-map-template.yaml $@ + +$(DIR)/thin-egress-app.yaml: cloudformation/thin-egress-app.yaml + @mkdir -p $(DIR) + cp cloudformation/thin-egress-app.yaml $(DIR)/thin-egress-app.yaml +ifdef CF_DEFAULT_CODE_BUCKET + python3 scripts/sed.py -i $(DIR)/thin-egress-app.yaml "asf.public.code" "${CF_DEFAULT_CODE_BUCKET}" +endif + python3 scripts/sed.py -i $(DIR)/thin-egress-app.yaml "" "${CF_DEFAULT_DEPENDENCY_ARCHIVE_KEY}" + python3 scripts/sed.py -i $(DIR)/thin-egress-app.yaml "" "${CF_DEFAULT_CODE_ARCHIVE_KEY}" + python3 scripts/sed.py -i $(DIR)/thin-egress-app.yaml "" "${CF_BUILD_VERSION}" + python3 scripts/sed.py -i $(DIR)/thin-egress-app.yaml "^Description:.*" 'Description: "${CF_DESCRIPTION}"' + +.SECONDARY: $(DIST_TERRAFORM) +$(DIST_TERRAFORM): $(DIR)/%: % + @mkdir -p $(@D) + cp $< $@ + +$(DIR)/thin-egress-app-terraform.zip: \ + $(DIR)/thin-egress-app-code.zip \ + $(DIR)/thin-egress-app-dependencies.zip \ + $(DIR)/thin-egress-app.yaml \ + $(DIST_TERRAFORM) + @mkdir -p $(DIR)/terraform + cp $(DIR)/thin-egress-app-code.zip $(DIR)/terraform/lambda.zip + cp $(DIR)/thin-egress-app-dependencies.zip $(DIR)/terraform/dependencylayer.zip + cp $(DIR)/thin-egress-app.yaml $(DIR)/terraform/thin-egress-app.yaml + cd $(DIR)/terraform && zip ../thin-egress-app-terraform.zip \ + *.tf \ + thin-egress-app.yaml \ + lambda.zip \ + dependencylayer.zip + +############## +# Deployment # +############## + +# Empty targets so we don't re-deploy stuff that is unchanged. Technically they +# might not be empty, but their purpose is the same. +# https://www.gnu.org/software/make/manual/html_node/Empty-Targets.html + +$(EMPTY)/.deploy-dependencies: $(DIR)/thin-egress-app-dependencies.zip + @echo "Deploying dependencies" + $(AWS) s3 cp --profile=$(AWS_PROFILE) $< \ + s3://$(CODE_BUCKET)/$(CODE_PREFIX)dependencies-$(S3_ARTIFACT_TAG).zip + + @mkdir -p $(EMPTY) + @echo $(S3_ARTIFACT_TAG) > $(EMPTY)/.deploy-dependencies + +$(EMPTY)/.deploy-code: $(DIR)/thin-egress-app-code.zip + @echo "Deploying code" + $(AWS) s3 cp --profile=$(AWS_PROFILE) \ + $(DIR)/thin-egress-app-code.zip \ + s3://$(CODE_BUCKET)/$(CODE_PREFIX)code-$(S3_ARTIFACT_TAG).zip + + @mkdir -p $(EMPTY) + @echo $(S3_ARTIFACT_TAG) > $(EMPTY)/.deploy-code + +$(EMPTY)/.deploy-bucket-map: $(DIR)/bucket-map.yaml + @echo "Deploying bucket map" + $(AWS) s3 cp --profile=$(AWS_PROFILE) $< \ + s3://$(CONFIG_BUCKET)/$(CONFIG_PREFIX)bucket-map-$(S3_ARTIFACT_TAG).yaml + + @mkdir -p $(EMPTY) + @echo $(S3_ARTIFACT_TAG) > $(EMPTY)/.deploy-bucket-map + +# Optionally upload a bucket map if the user hasn't specified one +BUCKET_MAP_REQUIREMENT := +ifeq ($(BUCKET_MAP_OBJECT_KEY), DEFAULT) +BUCKET_MAP_REQUIREMENT := $(EMPTY)/.deploy-bucket-map +BUCKET_MAP_OBJECT_KEY = $(CONFIG_PREFIX)bucket-map-`cat $(EMPTY)/.deploy-bucket-map`.yaml +endif + +$(EMPTY)/.deploy-stack: $(DIR)/thin-egress-app.yaml $(EMPTY)/.deploy-dependencies $(EMPTY)/.deploy-code $(BUCKET_MAP_REQUIREMENT) + @echo "Deploying stack '$(STACK_NAME)'" + $(AWS) cloudformation deploy \ + --profile=$(AWS_PROFILE) \ + --stack-name $(STACK_NAME) \ + --template-file $(DIR)/thin-egress-app.yaml \ + --capabilities CAPABILITY_NAMED_IAM \ + --parameter-overrides \ + LambdaCodeS3Key="$(CODE_PREFIX)code-`cat $(EMPTY)/.deploy-code`.zip" \ + LambdaCodeDependencyArchive="$(CODE_PREFIX)dependencies-`cat $(EMPTY)/.deploy-dependencies`.zip" \ + BucketMapFile="$(BUCKET_MAP_OBJECT_KEY)" \ + URSAuthCredsSecretName=$(URS_CREDS_SECRET_NAME) \ + AuthBaseUrl=$(URS_URL) \ + ConfigBucket=$(CONFIG_BUCKET) \ + LambdaCodeS3Bucket=$(CODE_BUCKET) \ + PermissionsBoundaryName= \ + PublicBucketsFile="" \ + PrivateBucketsFile="" \ + BucketnamePrefix=$(BUCKETNAME_PREFIX) \ + DownloadRoleArn="" \ + DownloadRoleInRegionArn="" \ + HtmlTemplateDir= \ + StageName=API \ + Loglevel=DEBUG \ + Logtype=json \ + Maturity=DEV\ + PrivateVPC= \ + VPCSecurityGroupIDs= \ + VPCSubnetIDs= \ + EnableApiGatewayLogToCloudWatch="False" \ + DomainName=$(DOMAIN_NAME-"") \ + DomainCertArn=$(DOMAIN_CERT_ARN-"") \ + CookieDomain=$(COOKIE_DOMAIN-"") \ + LambdaTimeout=$(LAMBDA_TIMEOUT) \ + LambdaMemory=$(LAMBDA_MEMORY) \ + JwtAlgo=$(JWTALGO) \ + JwtKeySecretName=$(JWT_KEY_SECRET_NAME) \ + UseReverseBucketMap="False" \ + UseCorsCookieDomain="False" + + @mkdir -p $(EMPTY) + @touch $(EMPTY)/.deploy-stack + +# Deploy everything +.PHONY: deploy +deploy: deploy-code deploy-dependencies deploy-stack + +# Deploy individual components +.PHONY: deploy-code +deploy-code: $(EMPTY)/.deploy-code + +.PHONY: deploy-dependencies +deploy-dependencies: $(EMPTY)/.deploy-dependencies + +.PHONY: deploy-bucket-map +deploy-bucket-map: $(EMPTY)/.deploy-bucket-map + +.PHONY: deploy-stack +deploy-stack: $(EMPTY)/.deploy-stack + +# Remove the empty target files so that aws commands will be run again +.PHONY: cleandeploy +cleandeploy: + rm -r $(EMPTY) + +############### +# Development # +############### + +.PHONY: test +test: + pytest --cov=lambda --cov-report=term-missing --cov-branch tests diff --git a/Makefile.config.example b/Makefile.config.example new file mode 100644 index 00000000..66e23bdb --- /dev/null +++ b/Makefile.config.example @@ -0,0 +1,78 @@ +# ================================= READ ME ================================= # +# For building artifacts (`make build`), no configuration is necessary. +# For deploying a dev stack (`make deploy`), all variables marked with +# +# REQUIRED_FOR_DEPLOY +# +# Must be replaced with values specific to your environment. +# =========================================================================== # + +############## +# TEA config # +############## + +# The name of the CloudFormation stack to deploy to. +# For development, this should be a constant value. If code is changed the +# stack will be updated with the new zip files. +STACK_NAME := thin-egress-app-dev + +# S3 bucket that will contain the build artifacts +CODE_BUCKET := REQUIRED_FOR_DEPLOY! +# Object prefix for artifacts in the code bucket +CODE_PREFIX := $(STACK_NAME)/ + +# S3 bucket that will contain the configuration files such as the bucket map +CONFIG_BUCKET := REQUIRED_FOR_DEPLOY! +# Object prefix for artifacts in the config bucket +CONFIG_PREFIX := $(STACK_NAME)/ +# Uncomment the following line to use an existing bucket map rather than uploading one +# BUCKET_MAP_OBJECT_KEY := + +# String that will be prepended to bucket names after looking them up in the bucket map +BUCKETNAME_PREFIX := + +# These secrets are required but not managed by the Makefile. +# For full deployments use `build/deploy.sh` +JWT_KEY_SECRET_NAME := REQUIRED_FOR_DEPLOY! +URS_CREDS_SECRET_NAME := REQUIRED_FOR_DEPLOY! + +# EarthData Login base URL +URS_URL := https://uat.urs.earthdata.nasa.gov + +#################################### +# CloudFormation Template Defaults # +#################################### +# These options refer specifically to the CloudFormation yaml file itself + +CF_DESCRIPTION := TEA snapshot, version: ${BUILD_ID} built ${DATE} +CF_BUILD_VERSION := $(BUILD_ID) + +# Uncomment the following line to use a different default value for the code bucket +# CF_DEFAULT_CODE_BUCKET := asf.public.code +# Default S3 object key for the dependency layer +CF_DEFAULT_DEPENDENCY_ARCHIVE_KEY := +# Default S3 object key for the lambda code +CF_DEFAULT_CODE_ARCHIVE_KEY := + +################## +# AWS cli config # +################## + +# AWS cli binary name +AWS := aws + +# AWS profile to use +AWS_PROFILE := default + +######## +# Misc # +######## +# Uncomment the following line to build the dependencies on the host (without docker) +# DOCKER_COMMAND := + +# Supported JWT algorithm +JWTALGO := RS256 + +# AWS Lambda +LAMBDA_TIMEOUT := 6 +LAMBDA_MEMORY := 128 diff --git a/README.MD b/README.MD index 23bb0ee7..796eb506 100644 --- a/README.MD +++ b/README.MD @@ -5,13 +5,14 @@ ![Last Good Build](https://img.shields.io/endpoint.svg?url=https%3A%2F%2Fs3.amazonaws.com%2Fasf.public.code%2Fthin-egress-app%2Flastgoodbuild.json) [![Last Release](https://img.shields.io/endpoint.svg?url=https%3A%2F%2Fs3.amazonaws.com%2Fasf.public.code%2Fthin-egress-app%2Flastrelease.json)]((https://github.com/asfadmin/thin-egress-app/releases/latest)) ![Test Results](https://img.shields.io/endpoint.svg?url=https%3A%2F%2Fs3.amazonaws.com%2Fasf.public.code%2Fthin-egress-app%2Ftestresults.json) +[![codecov](https://codecov.io/gh/asfadmin/thin-egress-app/branch/master/graph/badge.svg?token=Jd5l4IVpkM)](https://codecov.io/gh/asfadmin/thin-egress-app) [![Safety Badge](https://pyup.io/repos/github/asfadmin/thin-egress-app/shield.svg?t=1559317620375)](https://pyup.io/account/repos/github/asfadmin/thin-egress-app/) [![CodeFactor](https://www.codefactor.io/repository/github/asfadmin/thin-egress-app/badge)](https://www.codefactor.io/repository/github/asfadmin/thin-egress-app) -If you want the fastest path between 0 and S3 distribution, see the [TL;DR](#tldr) section -for a quick and easy TEA bootstrapping script. +If you want the fastest path between 0 and S3 distribution, see the [TL;DR](#tldr) section +for a quick and easy TEA bootstrapping script. -## Table of Contents: +## Table of Contents: * [History of TEA](#history-of-tea) - A Brief History of where TEA came from * [Purpose of TEA](#purpose-of-tea) - What TEA is and isn't * [TEA Vision](#tea-vision) @@ -19,14 +20,14 @@ for a quick and easy TEA bootstrapping script. * [System Architecture](#system-architecture) - Components and technologies in TEA * [External Integrations](#external-integrations) - How TEA related to other things * [Cumulus](#cumulus) - * [EDL](#edl) + * [EDL](#edl) * [EDC](#edc) * [Build Process](#build-process) - Where the code comes from * [Code Quality](#code-quality) * [Linting](#linting) * [CVE](#cve) * [Static Code Analysis](#static-code-analysis) - * [Automated Testing](#automated-testing) + * [Automated Testing](#automated-testing) * [Roll-Your-Own](#roll-your-own) * [Dependency Layer](#dependency-layer) * [Packaging up the egress code](#packaging-up-the-egress-code) @@ -58,6 +59,7 @@ for a quick and easy TEA bootstrapping script. * [Using awscli](#using-awscli) * [From Terraform](#from-terraform) * [From Cumulus](#from-cumulus) + * [For development](#for-development) * [Post deployment procedures](#post-deployment-procedures) - What do to AFTER Deploying * [Update EDL](#update-edl) * [Validating a Deployment](#validating-a-deployment) @@ -77,54 +79,54 @@ for a quick and easy TEA bootstrapping script. * [Requesting features](#requesting-features) * [TL;DR](#tldr) - We understand not everyone likes to read docs. We can do it for you! * [Walk me through this!](#walk-me-through-this) - + ## History of TEA -TEA was originally designed as a lightweight S3 distribution application with Earthdata Login +TEA was originally designed as a lightweight S3 distribution application with Earthdata Login (EDL) support meant to help test and evaluate the (then) forethcoming NGAP Enterprise Egress platform. TEA leveraged much of the design and features of ASF's original full-featured s3 distribution app, without any of the throttling or egress tracking capabilities. Without the -need to do throttling, TEA ditched docker/nginx/ECS in favor of the lighter, more nimble -Lambda + API Gateway architecture. +need to do throttling, TEA ditched docker/nginx/ECS in favor of the lighter, more nimble +Lambda + API Gateway architecture. [⬆ Return to Table of Contents](#table-of-contents) -## Purpose of TEA +## Purpose of TEA -At the most fundamental level, TEA is intended to be a FULLY Earthdata Cloud (EDC) compliant -application to enable distribution of EOSDIS data from Amazon S3 while ensuring data -providers have all the tools, controls and metrics required for ensuring data distribution -restriction and reporting compliance. TEA Distribution supports only static S3 data, and is +At the most fundamental level, TEA is intended to be a FULLY Earthdata Cloud (EDC) compliant +application to enable distribution of EOSDIS data from Amazon S3 while ensuring data +providers have all the tools, controls and metrics required for ensuring data distribution +restriction and reporting compliance. TEA Distribution supports only static S3 data, and is not intended for service-style or dynamic data distribution. This documentation is intended to support stand-alone TEA deployments, and while it may be a valuable tool for TEA deployments provisioned through Cumulus, we suggest you visit cumulus -documentation for guidance on using and customizing TEA from within cumulus. +documentation for guidance on using and customizing TEA from within cumulus. -### TEA Vision +### TEA Vision -We the **TEA**m Strive to create the simplest, most reliable, most feature-rich S3 +We the **TEA**m Strive to create the simplest, most reliable, most feature-rich S3 distribution app. Our intent is not to provide EVERY feature imaginable, but to maintain the -**THIN** core of broadly applicable features. We want to continue to engage the user -community to improve the feature set while staunchly resisting the siren song of feature -bloat and one-off special ponies. +**THIN** core of broadly applicable features. We want to continue to engage the user +community to improve the feature set while staunchly resisting the siren song of feature +bloat and one-off special ponies. ### Principles of TEA design * Keep it simple - No special ponies! * Remove unwanted/unused code * Write *clean* code -* Engage and support the community +* Engage and support the community * Strive for 10% community provided code base - we ❤️ PR's! [⬆ Return to Table of Contents](#table-of-contents) -## System Architecture +## System Architecture In the shortest possible explanation, TEA is a [Python Chalice](https://github.com/aws/chalice) -application that leverages AWS Lambda and API Gateway to create Pre-signed S3 URLs to -facilitate user downloads over https via CloudFront. +application that leverages AWS Lambda and API Gateway to create Pre-signed S3 URLs to +facilitate user downloads over https via CloudFront. ![TEA Design](https://github.com/asfadmin/thin-egress-app/raw/devel/tea.png) @@ -133,29 +135,29 @@ facilitate user downloads over https via CloudFront. ## External Integrations TEA cannot really operate in a vacuum. TEA is part of what might be considered the Earthdata -ecosystem. +ecosystem. ### Cumulus TEA can be considered the "official" distribution application of Cumulus, and while there is -a symbiotic relationship, TEA is an add-on to cumulus rather than an extension. We suggest -utilizing the applicable Cumulus documentation for tightly coupled TEA+Cumulus deployments. - +a symbiotic relationship, TEA is an add-on to cumulus rather than an extension. We suggest +utilizing the applicable Cumulus documentation for tightly coupled TEA+Cumulus deployments. + ### EDL -The Earthdata Login OAUTH service is a requirement for distributing data where user activity -tracking and reporting is required. Before deploying TEA, it is important to have access to, -or [create a new EDL Application](https://urs.earthdata.nasa.gov/apps/new). Creating and -administrating an EDL Application requires the EDL App Admin role, which must be requested -through the feedback modal in the Earthdata login header menu. +The Earthdata Login OAUTH service is a requirement for distributing data where user activity +tracking and reporting is required. Before deploying TEA, it is important to have access to, +or [create a new EDL Application](https://urs.earthdata.nasa.gov/apps/new). Creating and +administrating an EDL Application requires the EDL App Admin role, which must be requested +through the feedback modal in the Earthdata login header menu. -### EDC +### EDC -While TEA can be deployed outside of the Earthdata Cloud, it is designed and intended to be +While TEA can be deployed outside of the Earthdata Cloud, it is designed and intended to be used within the Earthdata Cloud environment. While we assume you'll be deploying to the EDC, we cannot provide assistance in setting up or configuring access to EDC. However, when there are steps or considerations that are required for integration into EDC, we will help ensure -you have all the information you need to do the right thing. +you have all the information you need to do the right thing. [⬆ Return to Table of Contents](#table-of-contents) @@ -166,121 +168,159 @@ you have all the information you need to do the right thing. ### Code Quality -TEA leverages several open source platforms along with mandatory peer code review to help +TEA leverages several open source platforms along with mandatory peer code review to help ensure the code base is clean, stable, and secure #### Linting -CodeFactor.io github integrations provide merge request blocking based on appropriate levels -of code linting. +CodeFactor.io github integrations provide merge request blocking based on appropriate levels +of code linting. -#### CVE +#### CVE -PyUp, Snyk and Whitesource all provide Common Vulnerabilities and Exposures (CVE) Analysis. -Emergent CVE fixes are automatically pushed to the TEA repo as merge requests and merged into +PyUp, Snyk and Whitesource all provide Common Vulnerabilities and Exposures (CVE) Analysis. +Emergent CVE fixes are automatically pushed to the TEA repo as merge requests and merged into the repo as soon as possible. #### Static Code Analysis -CodeFactor.io and Whitesource provide merge request block based on Static Code Analysis to +CodeFactor.io and Whitesource provide merge request block based on Static Code Analysis to help ensure our code is secure and safe to run. ### Automated Testing -Along with full-project human-executed manual regression testing, TEA also uses the Python +Along with full-project human-executed manual regression testing, TEA also uses the Python unittest framework (admittedly somewhat unconventionally) to run comprehensive integration -tests on freshly deployed stacks for every build. All test failures prevent successful -builds and code cannot be released without all tests completing successfully. - -### Roll-Your-Own +tests on freshly deployed stacks for every build. All test failures prevent successful +builds and code cannot be released without all tests completing successfully. -If you prefer to roll your own zip for the lambda code: +There is also a comprehensive unit test suite written using pytest. You can run it in a virtual +environment like this: -#### Dependency Layer -We've found that if the dependency layer isn't built in an `amazonlinux:2` environment, the JWT crypto doesn't work. Here are instructions for gathering and packaging the dependencies in an `amazonlinux` docker container. ```bash +# Create virtual environment +python3 -m venv .venv +# Activate virtual environment +source .venv/bin/activate -# Make up a filename for code archive: -DEPENDENCYLAYERFILENAME=tea_depencency_layer.zip +# Install requirements to the virtual environment. +# Make sure you see '(.venv)' in your shell prompt before running this! +pip install -r requirements.txt -r requirements-dev.txt -# get the repo -git clone https://github.com/asfadmin/thin-egress-app -cd thin-egress-app +# Run the tests +make test +``` + +### Roll-Your-Own -# copy requirements to a directory that is mounted into the docker container -cp rain-api-core/requirements.txt lambda/requirements_rain-api-core.txt -cp lambda/requirements.txt lambda/requirements_tea.txt -cp build/dependency_builder.sh lambda/ +If you prefer to roll your own zip for the lambda code you can use our Makefile to build it +from source in a bash environment. You will need to have a few tools installed for this to work: -docker run --rm -v lambda/:/depbuild/in -v ./:/depbuild/out -e "ZIPFILENAME=${DEPENDENCYLAYERFILENAME}" amazonlinux:2 bash /depbuild/in/dependency_builder.sh +- `zip` for creating zip files +- `docker` for building the dependency layer +- `awscli` (optional) for deploying to AWS -# Upload to S3 -aws s3 cp --profile=default ./${DEPENDENCYLAYERFILENAME} s3://${CODE_BUCKET}/ +If you're building the dependency layer locally (generally not recommended especially not on MacOS) +you will also need: +- `python3.8` with `pip` +- `git` for installing rain-api-core + +All build artifacts are created into the `dist` directory. +```bash +# Clean up from previous builds +make clean + +# Creates the lambda code zip: dist/thin-egress-app-code.zip +make code +# Creates the dependnecy layer zip: dist/thin-egress-app-dependencies.zip +make dependencies +# Creates the CloudFormation template: dist/thin-egress-app.yaml +make yaml +# Creates the Terraform zip file needed by cumulus +make terraform + +# Creates all of the above +make build ``` -#### Packaging up the egress code: +You can deploy these artifacts to a development stack with +```bash +make deploy +``` +*Note: You can run this command as many times as you like, make will automatically detect +changes made to the source code and rebuild/update the stack as needed* + +#### Configuration +After you run any `make` command, you will notice a file called `Makefile.config`. This contains +any Make configuration variables that you may wish to tweak for your development purposes. If you +need to override any additional variables from the Makefile you can put them in this file. +Alternatively, if you just need to make a one off change, `make` lets you set variables on the +command line: ```bash +make deploy STACK_NAME=thin-egress-app-temp AWS_PROFILE=development +``` -# Make up a filename for code archive: -CODE_ARCHIVE_FILENAME=thin-egress-app-code.zip +You will need to change any variables that have the value `REQUIRED!` before you can run +`make deploy`. +#### Dependency Layer + +```bash # get the repo git clone https://github.com/asfadmin/thin-egress-app cd thin-egress-app -# Create a scratch directory in which to confine the required module +# Build the zip file +make dependencies -# To make the archive smaller, you can cull unnecessary packages and files from the directory. The particulars -# are beyond the scope of this documentation. - -# Create the zip archive and put the required modules in -zip -r9 ./${CODE_ARCHIVE_FILENAME} ./lambda +# Upload to S3 +aws s3 cp --profile=default dist/thin-egress-app-dependencies.zip s3://${CODE_BUCKET}/ +``` -# Add the egress python code -zip -g ./${CODE_ARCHIVE_FILENAME} ./app.py +*Note: If you are expecting the `cryptography` dependency to be built from source, you may need to +run these commands in an `amazonlinux:2` docker container. However, as `cryptography` distributes +`manylinux` wheels, this should not be necessary* -# Add the update lambda -zip -g ./${CODE_ARCHIVE_FILENAME} ./update_lambda.py +#### Packaging up the egress code: -# Add the html templates -zip -g -r2 ./${CODE_ARCHIVE_FILENAME} ./templates +```bash +# Get the repo +git clone https://github.com/asfadmin/thin-egress-app +cd thin-egress-app -# Add the common code: -cd rain_api_core -zip -g -r9 ./${CODE_ARCHIVE_FILENAME} ./rain_api_core -cd .. +# Build the zip file +make code # Upload to S3 -aws s3 cp --profile=default ./${CODE_ARCHIVE_FILENAME} s3://${CODE_BUCKET}/ - +aws s3 cp --profile=default dist/thin-egress-app-code.zip s3://${CODE_BUCKET}/ ``` [⬆ Return to Table of Contents](#table-of-contents) -## Getting TEA Code +## Getting TEA Code -Major release documentation can be found -[in github](https://github.com/asfadmin/thin-egress-app/releases/latest). +Major release documentation can be found +[in github](https://github.com/asfadmin/thin-egress-app/releases/latest). ### github.com Raw TEA code is broken into two github repos: - * [thin-egress-app](https://github.com/asfadmin/thin-egress-app/) - - Chalice App, infrastructure-as-code, build scaffolding - * [rain-api-core](https://github.com/asfadmin/rain-api-core/) - + * [thin-egress-app](https://github.com/asfadmin/thin-egress-app/) - + Chalice App, infrastructure-as-code, build scaffolding + * [rain-api-core](https://github.com/asfadmin/rain-api-core/) - EDL, AWS, Session Management helper functions ### s3://asf.public.code -All public releases of ASF code are made available via the public bucket +All public releases of ASF code are made available via the public bucket [asf.public.code](https://s3.amazonaws.com/asf.public.code/index.html). Each build has 4 files: -* **tea-cloudformation-build.#.yaml** - CloudFormation template +* **tea-cloudformation-build.#.yaml** - CloudFormation template * **tea-code-build.#.zip** - Lambda App Python Code * **tea-dependencylayer-build.#.zip** - Lambda layer Python Dependency Code -* **tea-terraform-build.#.zip** - TEA as a Terraform module (For cumulus!) +* **tea-terraform-build.#.zip** - TEA as a Terraform module (For cumulus!) [⬆ Return to Table of Contents](#table-of-contents) @@ -288,42 +328,42 @@ files: ### Bucket Mapping -At the heart of TEA is the concept of the Bucket Map. This YAML file tells TEA how to -map URLs to buckets and how to control access to data in buckets. Mapping depths are -arbitrary and dynamically account for object path prefixes. Bucket maps are case +At the heart of TEA is the concept of the Bucket Map. This YAML file tells TEA how to +map URLs to buckets and how to control access to data in buckets. Mapping depths are +arbitrary and dynamically account for object path prefixes. Bucket maps are case sensitive! -When working with a bucket map, it is important to remember that bucket names are -relative to the optional `bucket prefix` TEA parameter. If a `bucket prefix` is -supplied, that value is prepended to any bucket name. This optional feature is intended -to allow bucket maps to be used in multiple maturities (e.g. Sandbox, SIT, UAT, Prod) -where bucket names are consistent across maturities given a specific bucket prefix. +When working with a bucket map, it is important to remember that bucket names are +relative to the optional `bucket prefix` TEA parameter. If a `bucket prefix` is +supplied, that value is prepended to any bucket name. This optional feature is intended +to allow bucket maps to be used in multiple maturities (e.g. Sandbox, SIT, UAT, Prod) +where bucket names are consistent across maturities given a specific bucket prefix. Assume the following bucket map: ```yaml MAP: path1: bucket-path-1 - + path2: path2a: bucket-path-2a - path2b: bucket-path-2b - + path2b: bucket-path-2b + path3: path3a: - path3ai: bucket-path-3ai + path3ai: bucket-path-3ai ``` You can derive the following object -> URL mappings: ``` -s3://bucket-path-1/object.txt => https://[APP-BASE-URL]/path1/object.txt +s3://bucket-path-1/object.txt => https://[APP-BASE-URL]/path1/object.txt s3://bucket-path-2a/file.ext => https://[APP-BASE-URL]/path2/path2a/file.ext s3://bucket-path-2b/low-res/browse.jpg => https://[APP-BASE-URL]/path2/path2b/low-res/browse.jpg s3://bucket-path-3ai/some-product.h5 => https://[APP-BASE-URL]/path3/path3a/path3ai/some-product.h5 ``` -It is **NOT** possible to have distribution at the app root. That is, -`s3://bucket-path-1/object.txt` cannot be configured to be distributed at +It is **NOT** possible to have distribution at the app root. That is, +`s3://bucket-path-1/object.txt` cannot be configured to be distributed at `https://[APP-BASE-URL]/object.txt`. #### Custom Headers @@ -340,7 +380,7 @@ MAP: ### EDL Access Control -By default, all buckets are assumed to require a user to log into Earthdata Login to +By default, all buckets are assumed to require a user to log into Earthdata Login to download data. However, there are two options to change that behavior #### Public Buckets @@ -350,32 +390,32 @@ Buckets can be made public by adding a `PUBLIC_BUCKETS` block: ```yaml MAP: browse: browse-bucket - -PUBLIC_BUCKETS: + +PUBLIC_BUCKETS: browse-bucket: "Internal only Comment about browse being public" -``` +``` In the above example, accessing `https://[APP-BASE-URL]/browse/some-browse-image.jpg` -would not require EDL authentication, however, if a user was already logged in, the -downloads would be tagged with that users EDL User ID. +would not require EDL authentication, however, if a user was already logged in, the +downloads would be tagged with that users EDL User ID. #### Private Buckets -TEA download can also be restricted to users who belonging to an EDL Application -User Group. This allows App and Data owners to specify a discrete list of specially +TEA download can also be restricted to users who belonging to an EDL Application +User Group. This allows App and Data owners to specify a discrete list of specially approved users who can download data. ```yaml MAP: pre-commission: pre-comission-data-bucket - -PRIVATE_BUCKETS: - pre-commission-data-bucket: + +PRIVATE_BUCKETS: + pre-commission-data-bucket: - internal_users - external_team -``` +``` -In the example above, TEA will ensure that a user attempt to download +In the example above, TEA will ensure that a user attempt to download `https://[APP-BASE-URL]/pre-commission/not-yet-public.zip` belongs to **either** the App User Group `internal_users` or `external_team`. Users who are not in one or both of those groups will not be granted a download. @@ -388,46 +428,46 @@ object prefixing to control access: ```yaml MAP: data-bucket: data-bucket - + PUBLIC_BUCKETS: data-bucket/browse: "Browse image" - -PRIVATE_BUCKETS: - data-bucket/pre-commission-data: + +PRIVATE_BUCKETS: + data-bucket/pre-commission-data: - internal_users - external_team -``` +``` In the above example, while access data in `data-bucket` requires simple auth, -accessing an object with the prefix `browse/` will require NO auth, and -`pre-commission-data/` will require EDL App group access as specified. +accessing an object with the prefix `browse/` will require NO auth, and +`pre-commission-data/` will require EDL App group access as specified. ### Custom Templating -You may optionally create your own [jinja2](http://jinja.pocoo.org/docs/2.10/) html -templates. If no custom templates are supplied in the `HtmlTemplateDir` subdirectory -of the `ConfigBucket` bucket, ultra-generic (and a little annoying!) +You may optionally create your own [jinja2](http://jinja.pocoo.org/docs/2.10/) html +templates. If no custom templates are supplied in the `HtmlTemplateDir` subdirectory +of the `ConfigBucket` bucket, ultra-generic (and a little annoying!) [default templates](https://github.com/asfadmin/thin-egress-app/tree/devel/lambda/templates) -are used. +are used. **base.html** -This is the base template. +This is the base template. -Blocks: +Blocks: * `pagetitle`: Gets inserted inside the `` element * `content`: Content payload fed into the template. **root.html** Child template. Gets called by `/` and `/logout` for 200 responses. -Variables: +Variables: * `title`: page title * `URS_URL`: used to make the login link * `STAGE`: used to make a URL back to the egress app * `profile`: in the default template, `profile.first_name` and `profile.last_name` are used to greet a logged-in user. The entire profile dict is available to the template. * `contentstring`: any text can go here - -**error.html** + +**error.html** Child template that gets called when various 400 type errors happen. Variables: @@ -435,26 +475,26 @@ Variables: * `status_code`: http status code goes here * `contentstring`: any text can go here -**profile.html** +**profile.html** Child template that displays profile info. Only used for debugging in dev. ### Shared Token Support -TEA can accept a shared EDL Token as an Authorization (Bearer Token) method. To -enable this behavior, EDL Apps (Service + TEA) must belong to a shared EDL App -Group. Processing a shared token is temporally expensive. After the initial -request, subsequent Service->TEA data requests should reuse cookies. EULA -enforcement is preserved with shared tokens. +TEA can accept a shared EDL Token as an Authorization (Bearer Token) method. To +enable this behavior, EDL Apps (Service + TEA) must belong to a shared EDL App +Group. Processing a shared token is temporally expensive. After the initial +request, subsequent Service->TEA data requests should reuse cookies. EULA +enforcement is preserved with shared tokens. ![TEA](https://github.com/asfadmin/thin-egress-app/blob/devel/harmony-chain.png) ### Dynamic In-Region IP CIDR Updates -TEA deploys a Lambda, triggered by a subscription to the SNS Topic +TEA deploys a Lambda, triggered by a subscription to the SNS Topic `arn::aws:sns:us-east-1:806199016981:AmazonIpSpaceChanged`, which downloads the -AWS Provided [ip-ranges.json](https://ip-ranges.amazonaws.com/ip-ranges.json) +AWS Provided [ip-ranges.json](https://ip-ranges.amazonaws.com/ip-ranges.json) file, parses out the IP CIDRs for the deploy region, and automatically updates -the In-Region IAM Role's Policy Document `condition` block. +the In-Region IAM Role's Policy Document `condition` block. This lambda is defined in [update_lambda.py](https://github.com/asfadmin/thin-egress-app/blob/master/lambda/update_lambda.py) @@ -464,9 +504,9 @@ This lambda is defined in ## Working inside EDC While TEA is primarily designed to be deployed inside EDC, below are some -important configurations to be aware of to make deploying to EDC Successful. +important configurations to be aware of to make deploying to EDC Successful. -### VPC and Networking +### VPC and Networking This bash script will give you all the parameters you'll need to deploy into EDC: @@ -482,8 +522,8 @@ echo "PrivateVPC=$VPCID; VPCSecurityGroupIDs=$SECURITYGROUP; VPCSubnetIDs=$SUBNE ### VPC Endpoints -It is also important to be aware that an API Gateway VPC Endpoint will need to -be setup prior to deployment. You can check to see if your account has the +It is also important to be aware that an API Gateway VPC Endpoint will need to +be setup prior to deployment. You can check to see if your account has the appropriate VPCE by running this command: ```bash @@ -493,18 +533,18 @@ aws $AWSENV ec2 describe-vpc-endpoints --query "VpcEndpoints[?(VpcId=='$VPCID' & ### Permission Boundaries When deploying into NGAP, it is important to supply a Permission Boundary. This -allows CloudFormation to create necessary IAM roles on your behalf. +allows CloudFormation to create necessary IAM roles on your behalf. ### Do I need a Bastion? -The answer is that it certainly helps. If your account does not have a bastion, -validating a deployment will be very difficult. +The answer is that it certainly helps. If your account does not have a bastion, +validating a deployment will be very difficult. [⬆ Return to Table of Contents](#table-of-contents) ## Deploying TEA -At its core, TEA uses a CloudFormation template to deploy all of its resources to +At its core, TEA uses a CloudFormation template to deploy all of its resources to AWS. Additionally, there is an available [Terraform module](#from-terraform) that can be used on its own, or as an add-on to Cumulus. @@ -516,10 +556,10 @@ Two secrets manager objects are required for TEA to function properly: To set up the URS Auth secret object, You'll need these parameters: -* `UrsId` can be found on your SSO application's URS home page as `Client ID`. -* `UrsAuth` is the `UID` for your URS SSO app and password separated by a colon. -You can create the value with the command below. See EDL's -[Request Authorization Code](https://urs.earthdata.nasa.gov/sso_client_impl) +* `UrsId` can be found on your SSO application's URS home page as `Client ID`. +* `UrsAuth` is the `UID` for your URS SSO app and password separated by a colon. +You can create the value with the command below. See EDL's +[Request Authorization Code](https://urs.earthdata.nasa.gov/sso_client_impl) documentation for more details. Encode the Creds @@ -528,7 +568,7 @@ UrsAuth=$(echo -n ":" | base64) UrsId="" ``` -Write the base64 encoded keys into a json file: +Write the base64 encoded keys into a json file: ```bash cat << EOL > urscreds.json { @@ -548,7 +588,7 @@ aws $AWSENV secretsmanager create-secret --name urs_creds_for_tea \ #### JWT Cookie Secret JWT secrets allow multiple TEA instances to encode and decode each others JWT -cookies. This helps reduce the number of times EDL Authentication needs to +cookies. This helps reduce the number of times EDL Authentication needs to happen. There are many ways to create JWS Secrets, here is one example: First, Create a key pair and b64 encode them: @@ -557,7 +597,7 @@ ssh-keygen -t rsa -b 4096 -m PEM -f ./jwtcookie.key rsa_priv_key=$(openssl base64 -in jwtcookie.key -A) rsa_pub_key=$(openssl base64 -in jwtcookie.key.pub -A) ``` -Put the base-64 encoded keys into a json file like so: +Put the base-64 encoded keys into a json file like so: ```bash cat << EOL > jwtkeys.json { @@ -576,16 +616,16 @@ aws $AWSENV secretsmanager create-secret --name jwt_secret_for_tea \ #### All Parameters * **App Settings**: - * `Loglevel` - How verbose the logging should be + * `Loglevel` - How verbose the logging should be * `Logtype` - How log entries are formed. Use json in conjunction with log analysis tools. Use flat for human debugging. * `Maturity` - Maturity, mostly for logs * `ConfigBucket` - S3 bucket where configuration files (bucket map, templates) are kept - * `HtmlTemplateDir` - (OPTIONAL) Subdirectory of `ConfigBucket` where templates can be found - * `StageName` - API Gateway Stage value + * `HtmlTemplateDir` - (OPTIONAL) Subdirectory of `ConfigBucket` where templates can be found + * `StageName` - API Gateway Stage value * `EnableApiGatewayLogToCloudWatch` - Whether or not API Gateway logs should be dumped * **Domain Settings**: * `DomainName` - (OPTIONAL) domain name as a user will access it (ie CloudFront, CNAME, etc) - * `CookieDomain` - (OPTIONAL) domain name value for minting cookies + * `CookieDomain` - (OPTIONAL) domain name value for minting cookies * `DomainCertArn` - (OPTIONAL) Arn to a AWS ACM SSL Cert for HTTPS access * `UseCorsCookieDomain` - If `True`, and `CookieDomain` is set, this enables CORS Response Headers * **Lambda Code**: @@ -601,42 +641,42 @@ aws $AWSENV secretsmanager create-secret --name jwt_secret_for_tea \ * `BucketnamePrefix` - (OPTIONAL) Bucket prefix value (see [Bucket Mapping](#bucket-mapping)) * `BucketMapFile` - bucket map YAML file (see [Bucket Mapping](#bucket-mapping)) * `UseReverseBucketMap` - Ignore this value! - * `DownloadRoleArn` - (OPTIONAL) Pre-created IAM Role for minting presigned urls + * `DownloadRoleArn` - (OPTIONAL) Pre-created IAM Role for minting presigned urls * Leave blank to create a new role for deployment - * `DownloadRoleInRegionArn` - (OPTIONAL) Pre-created IAM Role for minting IN-REGION presigned urls + * `DownloadRoleInRegionArn` - (OPTIONAL) Pre-created IAM Role for minting IN-REGION presigned urls * Leave blank to create a new role for deployment - * `SuppressHeadCheck` - Disable the pre-sign object validation. Enable for speedier access. + * `SuppressHeadCheck` - Disable the pre-sign object validation. Enable for speedier access. * **Session Settings**: - * `SessionTTL` - How long, in seconds, JWT Cookies are valid for + * `SessionTTL` - How long, in seconds, JWT Cookies are valid for * `JwtAlgo` - JWT Signing algorithm * `JwtKeySecretName` - [JWT Cookie Secret](#jwt-cookie-secret) -* **NGAP Integration**: ⚠️ These are **REQUIRED** for EDC Deployments. See +* **NGAP Integration**: ⚠️ These are **REQUIRED** for EDC Deployments. See [VPC and Networking](#vpc-and-networking) * `PrivateVPC` - Private VPC ID in which we create resources (`$VPCID`) * `VPCSecurityGroupIDs` - Security group (`$SUBNETID`) * `VPCSubnetIDs` - VPC Subnets to deploy into (`$SECURITYGROUP`) * `PermissionsBoundaryName` - Permissions Boundary used for creating IAM Roles; probably `NGAPShRoleBoundary` - + #### AWS Console -Originally, I was going to walk through deploying TEA via the AWS -CloudFormation web console, but there are -[better guides](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/cfn-console-create-stack.html) +Originally, I was going to walk through deploying TEA via the AWS +CloudFormation web console, but there are +[better guides](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/cfn-console-create-stack.html) out there. So, instead, I'd just remind you that you need [THREE files](#getting-tea-code): * This file will be uploaded to the CloudFormation web console: - * **tea-cloudformation-build.#.yaml** + * **tea-cloudformation-build.#.yaml** * These two zips need to be uploaded to a bucket in your account: * **tea-code-build.#.zip** - * **tea-dependencylayer-build.#.zip** + * **tea-dependencylayer-build.#.zip** -#### Using awscli +#### Using awscli As with [AWS Console](#aws-console) deployments, you'll need to upload the two zip files to a code bucket in your account. After doing that, be sure you've sourced the [VPC and Networking](#vpc-and-networking) parameters as well as create -the two required [Secrets](#secrets-setup). +the two required [Secrets](#secrets-setup). ```bash # Code path variables @@ -648,7 +688,7 @@ BUCKETMAP_FILENAME=my_bucketmap.yaml CFG_BUCKETNAME=my-tea-cfg # See the Cloudformation parameters section above for a description of these params. -STACK_NAME=my-tea # needs to be compatible with S3 naming requirements (lower case, no underscores, etc) +STACK_NAME=my-tea # needs to be compatible with S3 naming requirements (lower case, no underscores, etc) # because the CF template may create buckets using this name. AWS_REGION=us-west-2 # Or another region if desired. AWS_PROFILE=default # Or whichever awscli profile you want to deploy to. @@ -685,16 +725,30 @@ aws cloudformation deploy --profile=${AWS_PROFILE} --region=${AWS_REGION} \ ``` ### From Terraform -Why would you want to deploy it from Terraform? Just use +Why would you want to deploy it from Terraform? Just use [CloudFormation](#from-cloudformation)! But for real, if someone wants to do a write up for this, I'd love to include it. ### From Cumulus -Please see the -[Cumulus documentation](https://nasa.github.io/cumulus/docs/deployment/thin_egress_app) +Please see the +[Cumulus documentation](https://nasa.github.io/cumulus/docs/deployment/thin_egress_app) for current TEA deployment and configuration procedures. +### For development +You can deploy the CloudFormation template for development using our Makefile. You'll still need to +set up your secrets and S3 buckets manually, but once that's done pushing your changes is as easy as +running `make deploy`. + +```bash +# First time setup. Add your bucket and secret names to this file. +make Makefile.config + +# Build and deploy to the configured AWS profile. +# Run this whenever you're ready to test your code changes. +make deploy +``` + [⬆ Return to Table of Contents](#table-of-contents) ## Post deployment procedures @@ -730,35 +784,35 @@ curl ${api_endpoint}version #### Accessing the API Gateway inside EDC -It's important to understand that in the EDC, you can't do the above step. -There is no route into the VPC from the general internet. There are -write-ups elsewhere that go into depth on how to proxy HTTPS requests into -the private VPC. +It's important to understand that in the EDC, you can't do the above step. +There is no route into the VPC from the general internet. There are +write-ups elsewhere that go into depth on how to proxy HTTPS requests into +the private VPC. ### Requesting an EDC Public App -To access TEA from outside the private VPC, you'll need to make a [Publish +To access TEA from outside the private VPC, you'll need to make a [Publish New App](https://bugs.earthdata.nasa.gov/servicedesk/customer/portal/7/create/94) -NASD request. +NASD request. After the NASD has been processed and a new CloudFront domain is generated for your TEA deployment, you'll need to update the `DomainName` and `CookieName` -parameters of your [TEA Deployment](#all-parameters) with the new CloudFront +parameters of your [TEA Deployment](#all-parameters) with the new CloudFront domain name. ### Updating configuration of a live TEA -If the bucket map is updated, because of caching it can take quite some time before TEA loads it. +If the bucket map is updated, because of caching it can take quite some time before TEA loads it. If you would like to force TEA to load the new configs immediately, invoke the bumper lambda: ```bash -aws $AWSENV lambda invoke --function-name="${STACK_NAME}-BumperLambda" output.txt +aws $AWSENV lambda invoke --function-name="${STACK_NAME}-BumperLambda" output.txt ``` ### Using custom domain names There is a process by which you can request an Alternate Domain name to your CloudFront endpoint. If you do this, you'll need to update the `DomainName`, -`CookieName`, and `DomainCertArn` parameters of your +`CookieName`, and `DomainCertArn` parameters of your [TEA Deploment](#all-parameters). [⬆ Return to Table of Contents](#table-of-contents) @@ -773,61 +827,61 @@ TEA Creates two log streams: * **`/aws/lambda/-EgressLambda`** - This is where App logs go * **`/aws/lambda/-UpdatePolicyLambda`** - Logs from the Lambda that -keeps the in-region CIDR list up to date in the in-region download role. +keeps the in-region CIDR list up to date in the in-region download role. ### Values embedded into S3 logs -When TEA generates a pre-signed S3 download URL, it adds a query parameter +When TEA generates a pre-signed S3 download URL, it adds a query parameter `A-userid`, the value of which is the EDL User ID, if any, that was used to -download the data. This parameter is available to be mined out of the S3 +download the data. This parameter is available to be mined out of the S3 access logs. -When CloudFront is deployed in front of TEA, that `A-userid` parameters is +When CloudFront is deployed in front of TEA, that `A-userid` parameters is also added to the s3 download request that CloudFront generates. CloudFront will also add a `sourceip` parameter that holds the true IP of the external -user. +user. [⬆ Return to Table of Contents](#table-of-contents) ## When things go wrong -There is a lot that _can_ go wrong, but we hope that if you followed this -guide, you'll have prevented many of the common problems. If not, start +There is a lot that _can_ go wrong, but we hope that if you followed this +guide, you'll have prevented many of the common problems. If not, start here! ### Troubleshooting -#### Error message: -If you see an error message in the Cloudformation Events like this: +#### Error message: +If you see an error message in the Cloudformation Events like this: > CloudWatch Logs role ARN must be set in account settings to enable logging (Service: AmazonApiGateway; Status Code: 400; Error Code: BadRequestException; ... #### Solution: `EnableApiGatewayLogToCloudWatch` is set to `True`. If you don't need API Gateway logging to cloudwatch, set to `False`. If you do, you must create a role with write access to Cloudwatch Logs and add its ARN here: `https://console.aws.amazon.com/apigateway/home?region=#/settings`. - + #### Updating Cached Values: -For efficiency, TEA will cache configuration into the Lambda run environment. -Certain changes, like modifications to the bucket map or secrets, may not be -immediately picked up. Lambda run times will eventually time out, but you can -force refetching of cached values be simply adding a dummy environment -variable to the running Lambda environment. There may be a better way to -trigger Lambda environment flushing, lets us know if you find a way. +For efficiency, TEA will cache configuration into the Lambda run environment. +Certain changes, like modifications to the bucket map or secrets, may not be +immediately picked up. Lambda run times will eventually time out, but you can +force refetching of cached values be simply adding a dummy environment +variable to the running Lambda environment. There may be a better way to +trigger Lambda environment flushing, lets us know if you find a way. ### Bug reporting/tracking -First step when you encounter an error is to check the -[Troubleshooting](#troubleshooting) section. If your problem is not there, -feel free to reach out in the `#tea-pot` Slack Channel. If your problem -can't be resolved, we'll put in a +First step when you encounter an error is to check the +[Troubleshooting](#troubleshooting) section. If your problem is not there, +feel free to reach out in the `#tea-pot` Slack Channel. If your problem +can't be resolved, we'll put in a [Github Issue](https://github.com/asfadmin/thin-egress-app/issues) to help -track the problem and seek resolution. +track the problem and seek resolution. ### Maintenance cycle & Releases -We have no concrete cycle. If nothing else, we'll do periodic releases to -help resolve CVE issues. +We have no concrete cycle. If nothing else, we'll do periodic releases to +help resolve CVE issues. [⬆ Return to Table of Contents](#table-of-contents) @@ -845,8 +899,8 @@ accepting contributions: ### Requesting features -If you have a feature you'd like to request, first confirm the feature -aligns with our [TEA Vision](#tea-vision). If you think the feature +If you have a feature you'd like to request, first confirm the feature +aligns with our [TEA Vision](#tea-vision). If you think the feature belongs in TEA, you have TWO options: * Request the feature using [Github Issue](https://github.com/asfadmin/thin-egress-app/issues) @@ -857,23 +911,23 @@ belongs in TEA, you have TWO options: ## TL;DR There are LOTS of words in this document. Lots of steps. Lots of ways -things can go wrong. It's understandable that you might not be +things can go wrong. It's understandable that you might not be interested reading all the words and performing all the steps. In that case, try our magic bootstrapping script! ### Walk me through this! -You can either download [`deploy.sh`](https://github.com/asfadmin/thin-egress-app/blob/devel/build/deploy.sh) -and run it, or, as show below, curl the output directly into bash. +You can either download [`deploy.sh`](https://github.com/asfadmin/thin-egress-app/blob/devel/build/deploy.sh) +and run it, or, as show below, curl the output directly into bash. -This bash script requires `curl`, -[`awscli`](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-install.html), -[`Session Manager Plugin`](https://docs.aws.amazon.com/systems-manager/latest/userguide/session-manager-working-with-install-plugin.html), -and [`jq`](https://github.com/stedolan/jq/wiki/Installation) to be +This bash script requires `curl`, +[`awscli`](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-install.html), +[`Session Manager Plugin`](https://docs.aws.amazon.com/systems-manager/latest/userguide/session-manager-working-with-install-plugin.html), +and [`jq`](https://github.com/stedolan/jq/wiki/Installation) to be installed. -Run `deploy.sh` by replacing all `` with the appropriate -values. `STACK-NAME` should be less than 20 characters, lower-case +Run `deploy.sh` by replacing all `` with the appropriate +values. `STACK-NAME` should be less than 20 characters, lower-case letters, numbers, or `-`. It should also be globally unique. ```bash @@ -886,7 +940,7 @@ curl -s 'https://raw.githubusercontent.com/asfadmin/thin-egress-app/devel/build/ --client-id= \ --pass='' \ --maturity= \ - --edl-user-creds=':' + --edl-user-creds=':' ``` All parameters are optional, but not supplying params affects behavior. diff --git a/build/Jenkinsfile b/build/Jenkinsfile index ecc2ad49..361e3dca 100644 --- a/build/Jenkinsfile +++ b/build/Jenkinsfile @@ -62,11 +62,11 @@ pipeline { STACKNAME_SAME="${APPTAG}-jenk-same" STACKNAME_SCND="${APPTAG}-jenk-scnd" URS_CREDS_SECRET_NAME="${URS_CREDS_SECRET_NAME}" - DEPENDENCYLAYERFILENAME="${env.APPTAG}-dependencylayer-${env.NEWTAG}.zip" + DEPENDENCY_LAYER_FILENAME="${env.APPTAG}-dependencylayer-${env.NEWTAG}.zip" DOCKERREPO="docker-registry.asf.alaska.edu:5000" JWTKEYSECRETNAME="${JWTKEYSECRETNAME}" JWTALGO="RS256" - ZIPFILENAME="${DEPENDENCYLAYERFILENAME}" + ZIPFILENAME="${DEPENDENCY_LAYER_FILENAME}" LAMBDA_TIMEOUT=6 LAMBDA_MEMORY=128 AWS_BIN="/usr/local/bin/aws" @@ -77,10 +77,11 @@ pipeline { } // Build on a slave with docker on kubernetes - agent { kubernetes { - yamlFile 'build/k8s.yaml' - } - } + agent { + kubernetes { + yamlFile 'build/k8s.yaml' + } + } stages { // just sort of assess the environment @@ -89,18 +90,18 @@ pipeline { container('tea-container') { // Send chat notification - mattermostSend channel: "${CHAT_ROOM}", color: '#EAEA5C', endpoint: "${env.CHATSERVER_SECRET_URL}", message: "Build started: ${env.JOB_NAME} ${env.BUILD_NUMBER}, branch: ${GIT_BRANCH} (<${env.BUILD_URL}|Open>). See (<{$env.RUN_CHANGES_DISPLAY_URL}|Changes>)." + mattermostSend( + channel: "${CHAT_ROOM}", + color: '#EAEA5C', + endpoint: "${env.CHATSERVER_SECRET_URL}", + message: "Build started: ${env.JOB_NAME} ${env.BUILD_NUMBER}, branch: ${GIT_BRANCH} (<${env.BUILD_URL}|Open>). See (<{$env.RUN_CHANGES_DISPLAY_URL}|Changes>)." + ) withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "${env.AWSCREDS}"]]) { // Dump ENV sh 'tree' sh 'pwd' sh 'env' } - // Update submodules. - sh "echo '>> Updating submodules....'" - sh "git config user.email 'jenkins@jenkins.asf.alaska.edu'" - sh "git config user.name 'jenkins'" - sh "git submodule update --init --recursive" // show us the params: echo "params.APPTAG: ${params.APPTAG}" @@ -121,58 +122,44 @@ pipeline { echo "params.AWSCREDS_TEA_DEPLOY: ${params.AWSCREDS_TEA_DEPLOY}" echo "params.COOKIE_DOMAIN: ${params.COOKIE_DOMAIN}" + // Clear out Makefile config + sh 'echo "" > Makefile.config' } } } - - // Download and package dependencies stage('Package Dependency Layer') { steps { container('layer-builder') { - - sh '${WORKSPACE}/build/dependency_builder.sh && echo "built to $WORKSPACE"' + sh "echo '>> Building dependency layer....'" + // We're already running in the right container so tell make to run without docker + sh "make dependencies DOCKER_COMMAND=" + sh "cp ${WORKSPACE}/dist/thin-egress-app-dependencies.zip ${WORKSPACE}/${DEPENDENCY_LAYER_FILENAME}" sh 'echo "finished depbuilder, now looking in some dirs" && cd "$WORKSPACE" && ls -lah' - // zip file now located at $WORKSPACE/$DEPENDENCYLAYERFILENAME } } - } + } // Package code stage('Package Code') { steps { container('tea-container') { - // Zip up the upload. - sh "echo '>> Building Zipfile....'" - - sh """ cd ${WORKSPACE}/lambda && \ - sed -i -e "s//${BUILDTAG}/" ./app.py && \ - zip -g ../${CODE_ARCHIVE_FILENAME} ./app.py && \ - sed -i -e "s//${BUILDTAG}/" ./tea_bumper.py && \ - zip -g ../${CODE_ARCHIVE_FILENAME} ./tea_bumper.py && \ - zip -g ../${CODE_ARCHIVE_FILENAME} ./update_lambda.py && \ - zip -g -r ../${CODE_ARCHIVE_FILENAME} ./templates && \ - cd .. && \ - cd rain-api-core && \ - zip -g -r ../${CODE_ARCHIVE_FILENAME} ./rain_api_core """ - - // Swap in BUILD# - sh "echo '>> Build Stamping....'" - - sh "cd ${WORKSPACE}" - - // thin-egress-app.yaml - sh 'if [ ${APPTAG} != "tea" ]; then sed -i -e "s/asf.public.code/${CODE_BUCKET}/" ./cloudformation/thin-egress-app.yaml; fi' - sh 'sed -i -e "s//${CODE_DIR}\\/${CODE_ARCHIVE_FILENAME}/" ./cloudformation/thin-egress-app.yaml' - sh 'sed -i -e "s//${CODE_DIR}\\/${DEPENDENCYLAYERFILENAME}/" ./cloudformation/thin-egress-app.yaml' - sh 'sed -i -e "s//${BUILD_ID}/g" ./cloudformation/thin-egress-app.yaml' - sh 'sed -i -e "s;^Description:.*;Description: \\"TEA built by Jenkins job ${JOB_NAME}, ${BUILDTAG}\\";" ./cloudformation/thin-egress-app.yaml' + sh "echo '>> Building lambda code....'" + sh "make code BUILD_ID=${BUILDTAG}" + sh "cp ${WORKSPACE}/dist/thin-egress-app-code.zip ${WORKSPACE}/${CODE_ARCHIVE_FILENAME}" + + sh "echo '>> Building CloudFormation yaml....'" + sh """ make yaml \ + CF_DEFAULT_CODE_BUCKET=${CODE_BUCKET} \ + CF_DEFAULT_DEPENDENCY_ARCHIVE_KEY=${CODE_DIR}/${DEPENDENCY_LAYER_FILENAME} \ + CF_DEFAULT_CODE_ARCHIVE_KEY=${CODE_DIR}/${CODE_ARCHIVE_FILENAME} \ + CF_BUILD_VERSION=${BUILD_ID} \ + CF_DESCRIPTION="TEA built by Jenkins job ${JOB_NAME}, ${BUILDTAG}" """ + sh "cp ${WORKSPACE}/dist/thin-egress-app.yaml ${WORKSPACE}/cloudformation/thin-egress-app.yaml" // Zip up terraform sh "echo '>> Building Terraform Zip....'" - sh "cp ${WORKSPACE}/${CODE_ARCHIVE_FILENAME} ${WORKSPACE}/terraform/lambda.zip" - sh "cp ${WORKSPACE}/${DEPENDENCYLAYERFILENAME} ${WORKSPACE}/terraform/dependencylayer.zip" - sh "cp ./cloudformation/thin-egress-app.yaml ${WORKSPACE}/terraform/thin-egress-app.yaml" - sh "cd ${WORKSPACE}/terraform && zip ../${TF_ZIP_FILENAME} *.tf thin-egress-app.yaml lambda.zip dependencylayer.zip" + sh "make terraform" + sh "cp ${WORKSPACE}/dist/thin-egress-app-terraform.zip ${WORKSPACE}/${TF_ZIP_FILENAME}" } } } @@ -185,7 +172,7 @@ pipeline { // Push the code up to private bucket sh "echo '>> Pushing dependencylayer to ASF Bucket'" - sh "${AWS_BIN} s3 cp ${WORKSPACE}/${DEPENDENCYLAYERFILENAME} s3://${CODE_BUCKET}${CODE_BUCKET_SUFFIX}/${CODE_DIR}/${DEPENDENCYLAYERFILENAME}" + sh "${AWS_BIN} s3 cp ${WORKSPACE}/${DEPENDENCY_LAYER_FILENAME} s3://${CODE_BUCKET}${CODE_BUCKET_SUFFIX}/${CODE_DIR}/${DEPENDENCY_LAYER_FILENAME}" sh "echo '>> Pushing code to ASF Bucket'" sh "${AWS_BIN} s3 cp ./${CODE_ARCHIVE_FILENAME} s3://${CODE_BUCKET}${CODE_BUCKET_SUFFIX}/${CODE_DIR}/" sh "${AWS_BIN} s3 cp ./cloudformation/thin-egress-app.yaml s3://${CODE_BUCKET}${CODE_BUCKET_SUFFIX}/${CODE_DIR}/${CF_TEMPLATE_FILENAME}" @@ -225,7 +212,7 @@ pipeline { DomainCertArn=${DOMAIN_CERT_ARN-""} \ CookieDomain=${COOKIE_DOMAIN-""} \ LambdaCodeS3Key=${CODE_DIR}/${CODE_ARCHIVE_FILENAME} \ - LambdaCodeDependencyArchive=${CODE_DIR}/${DEPENDENCYLAYERFILENAME} \ + LambdaCodeDependencyArchive=${CODE_DIR}/${DEPENDENCY_LAYER_FILENAME} \ LambdaCodeS3Bucket=${CODE_BUCKET}${CODE_BUCKET_SUFFIX} \ LambdaTimeout=${LAMBDA_TIMEOUT} \ LambdaMemory=${LAMBDA_MEMORY} \ @@ -263,7 +250,7 @@ pipeline { DomainCertArn=${DOMAIN_CERT_ARN} \ CookieDomain=${COOKIE_DOMAIN} \ LambdaCodeS3Key=${CODE_DIR}/${CODE_ARCHIVE_FILENAME} \ - LambdaCodeDependencyArchive=${CODE_DIR}/${DEPENDENCYLAYERFILENAME} \ + LambdaCodeDependencyArchive=${CODE_DIR}/${DEPENDENCY_LAYER_FILENAME} \ LambdaCodeS3Bucket=${CODE_BUCKET}${CODE_BUCKET_SUFFIX} \ LambdaTimeout=${LAMBDA_TIMEOUT} \ LambdaMemory=${LAMBDA_MEMORY} \ @@ -283,8 +270,8 @@ pipeline { // Load up AWS + URS credentials withCredentials([[$class: 'UsernamePasswordMultiBinding', credentialsId: "${env.URS_ACCT}", usernameVariable: 'URS_USERNAME', passwordVariable: 'URS_PASSWORD']]) { withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "${env.AWSCREDS}"]]) { - // Attempt some downloads - sh("python3 build/test/download_test.py") + // Run end to end tests + sh("pytest tests_e2e --stack-name=${STACKNAME_SAME} --test-results=asf.public.code/thin-egress-app/testresults.json --log-cli-level=DEBUG") } } } @@ -324,7 +311,7 @@ pipeline { sh "echo '>> Pushing code to Public Code Bucket'" sh "${AWS_BIN} s3 cp ./${CODE_ARCHIVE_FILENAME} s3://${CODE_PUBLIC_BUCKETNAME}/${CODE_DIR}/ --acl public-read" sh "${AWS_BIN} s3 cp ./${TF_ZIP_FILENAME} s3://${CODE_PUBLIC_BUCKETNAME}/${CODE_DIR}/ --acl public-read" - sh "${AWS_BIN} s3 cp ${WORKSPACE}/${DEPENDENCYLAYERFILENAME} s3://${CODE_PUBLIC_BUCKETNAME}/${CODE_DIR}/${DEPENDENCYLAYERFILENAME} --acl public-read" + sh "${AWS_BIN} s3 cp ${WORKSPACE}/${DEPENDENCY_LAYER_FILENAME} s3://${CODE_PUBLIC_BUCKETNAME}/${CODE_DIR}/${DEPENDENCY_LAYER_FILENAME} --acl public-read" sh "${AWS_BIN} s3 cp ./cloudformation/thin-egress-app.yaml s3://${CODE_PUBLIC_BUCKETNAME}/${CODE_DIR}/${CF_TEMPLATE_FILENAME} --acl public-read" sh """ echo '{ "schemaVersion": 1, "label": "Last Release", "message": "'$BUILDTAG'", "color": "success" }' > /tmp/lastrelease.json""" sh """ ${AWS_BIN} s3 cp --metadata-directive REPLACE --cache-control no-cache \ diff --git a/build/dependency_builder.sh b/build/dependency_builder.sh index c6f935a0..18afb4ef 100755 --- a/build/dependency_builder.sh +++ b/build/dependency_builder.sh @@ -1,52 +1,30 @@ #!/usr/bin/env bash -# required env vars: -# WORKSPACE - will contain the thin-egress-app project -# DEPENDENCYLAYERFILENAME - this script will output a zip file with this name +WORKSPACE=$(pwd) +OUT_FILE=$1 +BUILD_DIR="$WORKSPACE/$2" -echo "RUNNING dependency_builder.sh" +mkdir -p $BUILD_DIR/python +cd $BUILD_DIR/python || exit -echo "inside dependency building container env:" -printenv - -yum install -y amazon-linux-extras && \ -amazon-linux-extras enable python3.8 - -yum install -y zip python38 python38-pip -python3.8 -m pip install --upgrade pip -yum clean all - -mkdir -p /tmp/pkg/python - - -cd /tmp/pkg/python || exit - -echo "Updating Pip..." -python3.8 -m pip install -U pip -echo "Installing setuptools..." -python3.8 -m pip install --upgrade setuptools -echo "Installing ${WORKSPACE}/rain-api-core/requirements.txt" -python3.8 -m pip install -r "${WORKSPACE}"/rain-api-core/requirements.txt --target . -echo "Installing ${WORKSPACE}/lambda/requirements.txt" -python3.8 -m pip install -r "${WORKSPACE}"/lambda/requirements.txt --target . +echo "Installing ${WORKSPACE}/requirements.txt" +python3.8 -m pip install \ + --upgrade \ + -r "${WORKSPACE}/requirements.txt" \ + --target $BUILD_DIR/python \ + --cache-dir $BUILD_DIR/.pip-cache/ # get rid of unneeded things to make code zip smaller rm -rf ./*.dist-info -# rm -rf pip # commented out because https://snyk.io/vuln/SNYK-PYTHON-PIP-609855 rm -rf docutils -rm -rf chalice/cli # cli in lambda? No way! +rm -rf click chalice/cli # cli in lambda? No way! rm -rf botocore # included with lambda, just takes up space here -rm -rf setuptools +rm -rf pip setuptools wheel easy_install.py rm -rf tests -rm -rf easy_install.py -rm -f typing.py # MUST be removed, its presence causes error every time - -cd .. -# now in pkg/ -echo "zipping dependencies to ${WORKSPACE}/${DEPENDENCYLAYERFILENAME}." -ls -lah +cd $BUILD_DIR +echo "Zipping dependencies to ${WORKSPACE}/${OUT_FILE}" -zip -r9 "${WORKSPACE}/${DEPENDENCYLAYERFILENAME}" . +zip -r9 -q "${WORKSPACE}/${OUT_FILE}" python -echo "all done making dependency layer" \ No newline at end of file +echo "Done making dependency layer" diff --git a/build/dependency_layer_builder.Dockerfile b/build/dependency_layer_builder.Dockerfile deleted file mode 100644 index c998b3b9..00000000 --- a/build/dependency_layer_builder.Dockerfile +++ /dev/null @@ -1,18 +0,0 @@ -FROM amazonlinux:2 - - -ENV ZIPFILENAME=asf-ursdependencylayer.zip - -RUN yum update -y && \ - yum install -y amazon-linux-extras && \ - amazon-linux-extras enable python3.8 && \ - yum install -y zip python38 python38-pip && \ - rm -rf /var/cache/yum && yum clean all && mkdir -p /depbuild/pkg && mkdir -p /depbuild/out && mkdir /depbuild/in - -ADD dependency_builder.sh /depbuild/ -WORKDIR /depbuild - -# When running, you must specify `-v /local/dir/you/want/zip/to/appear/in:/depbuild/out -v /local/dir/where/requirements.txt/lives:/depbuild/in` -# This will output the zipfile at /depbuild/out - -CMD ["./dependency_builder.sh"] \ No newline at end of file diff --git a/build/i_and_a_builder_agent.Dockerfile b/build/i_and_a_builder_agent.Dockerfile index 83e2de97..cb7bada1 100644 --- a/build/i_and_a_builder_agent.Dockerfile +++ b/build/i_and_a_builder_agent.Dockerfile @@ -1,9 +1,10 @@ FROM ubuntu RUN apt-get update && \ - apt-get -y install curl python3 python3-pip git vim tree zip && \ - pip3 install -U pip && \ - pip3 install awscli boto3 requests + apt-get -y install curl python3 python3-pip git vim tree zip make + +RUN pip3 install -U pip +RUN pip3 install awscli boto3 requests pytest RUN apt-get clean && apt-get install -y apt-transport-https gnupg2 && \ curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \ @@ -20,5 +21,3 @@ RUN apt-get clean && apt-get install -y apt-transport-https gnupg2 && \ # docker push ${registry}/${apptag} CMD ["tail", "-f", "/dev/null"] - - diff --git a/build/k8s.yaml b/build/k8s.yaml index 8ecc818a..582341f5 100644 --- a/build/k8s.yaml +++ b/build/k8s.yaml @@ -13,19 +13,8 @@ spec: - "-f" - "/dev/null" - name: layer-builder - image: amazonlinux:2 + image: lambci/lambda:build-python3.8 command: - - "tail" - - "-f" - - "/dev/null" - volumeMounts: - - mountPath: "/depbuild/in" - name: depbuild-in - - mountPath: "/depbuild/out" - name: depbuild-out - readOnly: false - volumes: - - name: depbuild-in - emptyDir: {/dependency_builder.sh} - - name: depbuild-out - emptyDir: {/dependency_builder.sh} \ No newline at end of file + - "tail" + - "-f" + - "/dev/null" diff --git a/build/test/download_test.py b/build/test/download_test.py deleted file mode 100644 index f889c70f..00000000 --- a/build/test/download_test.py +++ /dev/null @@ -1,501 +0,0 @@ -import argparse -import sys -import unittest -import os -import boto3 -import requests -from requests.auth import HTTPBasicAuth -import logging -import json -import base64 -from datetime import datetime -from uuid import uuid1 -import time - -logging.getLogger('boto3').setLevel(logging.ERROR) -logging.getLogger('botocore').setLevel(logging.ERROR) -logging.getLogger('boto3').setLevel(logging.ERROR) -logging.getLogger('botocore').setLevel(logging.ERROR) -logging.getLogger('nose').setLevel(logging.ERROR) -logging.getLogger('elasticsearch').setLevel(logging.ERROR) -logging.getLogger('s3transfer').setLevel(logging.ERROR) -logging.getLogger('urllib3').setLevel(logging.ERROR) -logging.getLogger('connectionpool').setLevel(logging.ERROR) - -logging.basicConfig(format='%(asctime)s [L%(lineno)s - %(funcName)s()]: %(message)s', level=logging.DEBUG) -log = logging.getLogger(__name__) - -default_stackname = "teatest-jenk-same" -default_region = "us-west-2" -# Set environment variables -STACKNAME = os.getenv("STACKNAME_SAME", default_stackname) -AWS_DEFAULT_REGION = os.getenv("AWS_DEFAULT_REGION", default_region) -aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID") -aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY") - -# Connect to AWS -client = boto3.client('apigateway', region_name=AWS_DEFAULT_REGION, aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key) - -# Get EgressGateway Rest API ID from AWS and calculate APIROOT -rest_apis = client.get_rest_apis() -API_ID = None -for api in rest_apis['items']: - if api['name'] == f"{STACKNAME}-EgressGateway": - API_ID = api['id'] - -if not API_ID: - log.info(f"Could not find API for the given stackname {STACKNAME}") - exit() - -APIHOST = f"{API_ID}.execute-api.{AWS_DEFAULT_REGION}.amazonaws.com" -APIROOT = f"https://{APIHOST}/API" - -# Important Objects and strings we'll need for our tests -METADATA_FILE = 'SA/METADATA_GRD_HS/S1A_EW_GRDM_1SDH_20190206T190846_20190206T190951_025813_02DF0B_781A.iso.xml' -METADATA_FILE_CH = 'SA/METADATA_GRD_HS_CH/S1A_EW_GRDM_1SDH_20190206T190846_20190206T190951_025813_02DF0B_781A.iso.xml' -METADATA_CHECK = 'S1A_EW_GRDM_1SDH_20190206T190846_20190206T190951_025813_02DF0B_781A.iso.xml' -BROWSE_FILE = 'SA/BROWSE/S1A_EW_GRDM_1SDH_20190206T190846_20190206T190951_025813_02DF0B_781A.jpg' -OBJ_PREFIX_FILE = 'SA/METADATA_GRD_HS_CH/browse/ALAV2A104483200-OORIRFU_000.png' -MAP_PATHS = sorted(["SA/OCN", "SA/OCN_CH", "SB/OCN", "SB/OCN_CH"]) - -# Configuration: -default_test_result_bucket = "asf.public.code" -default_test_result_object = "thin-egress-app/testresults.json" -default_locate_bucket = "s1-ocn-1e29d408" -TEST_RESULT_BUCKET = os.getenv("TEST_RESULT_BUCKET", default_test_result_bucket) -TEST_RESULT_OBJECT = os.getenv("TEST_RESULT_OBJECT", default_test_result_object) -LOCATE_BUCKET = os.getenv("LOCATE_BUCKET", default_locate_bucket) - -# Global variable we'll use for our tests -cookiejar = [] -urs_username = os.getenv("URS_USERNAME") -urs_password = os.getenv("URS_PASSWORD") - - -def env_var_check(): - is_set = True - if STACKNAME == default_stackname: - log.info(f"The environment STACKNAME is set to the default value: {default_stackname}") - if AWS_DEFAULT_REGION == default_region: - log.info(f"The environment AWS_DEFAULT_REGION is set to the default value: {default_region}") - if aws_access_key_id is None: - log.info("The environment variable AWS_ACCESS_KEY_ID is not set") - is_set = False - if aws_secret_access_key is None: - log.info("The environment variable AWS_SECRET_ACCESS_KEY is not set") - is_set = False - if TEST_RESULT_BUCKET == default_test_result_bucket: - log.info(f"The environment TEST_RESULT_BUCKET is set to the default value: {default_test_result_bucket}") - if TEST_RESULT_OBJECT == default_test_result_object: - log.info(f"The environment TEST_RESULT_OBJECT is set to the default value: {default_test_result_object}") - if LOCATE_BUCKET == default_locate_bucket: - log.info(f"The environment LOCATE_BUCKET is set to the default value: {default_locate_bucket}") - if urs_username is None: - log.info("The environment variable URS_USERNAME is not set") - is_set = False - if urs_username is None: - log.info("The environment variable URS_PASSWORD is not set") - is_set = False - return is_set - - -class unauthed_download_test(unittest.TestCase): - # Check that public files are returned without auth - def test_check_that_images_are_public(self): - url = f'{APIROOT}/{BROWSE_FILE}' - r = requests.get(url) - - log.info(f'Public Image Test {url} Return Code: {r.status_code} (Expect 200)') - self.assertTrue(r.status_code == 200) - - if 'Content-Type' in r.headers: - log.info(f"Public Image Test Content-Type: {r.headers['Content-Type']} (Expect 'image/jpeg')") - else: - log.warning(f"Public Image Test Failed to get Content-Type. Headers: {r.headers}") - self.assertTrue('Content-Type' in r.headers and r.headers['Content-Type'] == 'image/jpeg') - - def test_check_public_obj_prefix(self): - url = f'{APIROOT}/{OBJ_PREFIX_FILE}' - r = requests.get(url) - - log.info(f'Public prefix in restricted bucket {url} Return Code: {r.status_code} (Expect 200)') - self.assertTrue(r.status_code == 200) - - def test_404_on_bad_request(self): - url = f"{APIROOT}/bad/url.ext" - r = requests.get(url) - - log.info(f"Checking that a non-existent file ({url}) returns a 404: r.status_code (Expect 404)") - self.assertTrue(r.status_code == 404) - - def test_bad_cookie_value_cause_URS_redirect(self): - url = f"{APIROOT}/{METADATA_FILE}" - cookies = {'urs_user_id': "badusername", 'urs_access_token': "blah"} - - log.info(f"Attempting to use bad cookies ({cookies}) to access {url}") - r = requests.get(url, allow_redirects=False) - - log.info(f"Bad cookies should result in a redirect to EDL. r.is_redirect: {r.is_redirect} (Expect True)") - self.assertTrue(r.is_redirect) - - log.info(f"Result r.headers['Location']: {r.headers['Location']}") - self.assertTrue(r.headers['Location'] is not None) - - log.info(f"Make sure 'Location' header is redirecting to URS") - self.assertTrue('oauth/authorize' in r.headers['Location']) - - -class auth_download_test(unittest.TestCase): - def test_auth_process_is_successful(self): - url = f"{APIROOT}/{METADATA_FILE}" - global cookiejar - - log.info(f"Hitting {url} to get redirect to URS for auth") - session = requests.session() - request = session.get(url) - url_earthdata = request.url - - secret_password = urs_password[0] + "*" * (len(urs_password) - 2) + urs_password[-1] - log.info(f"Following URS Redirect to {url_earthdata} with Basic auth ({urs_username}/{secret_password}) to generate an access cookie") - login2 = session.get(url_earthdata, auth=HTTPBasicAuth(urs_username, urs_password)) - - log.info(f"Login attempt results in status_code: {login2.status_code}") - cookiejar = session.cookies - - # Copy .asf.alaska.edu cookies to match API Address - for z in cookiejar: - if "asf.alaska.edu" in z.domain: - logging.info(f"Copying cookie {z.name} from {z.domain} => {APIHOST}") - cookiejar.set_cookie(requests.cookies.create_cookie(domain=APIHOST, name=z.name, value=z.value)) - - log.info(f"Generated cookies: {cookiejar}") - final_request = session.get(url, cookies=cookiejar) - - log.info(f"Final request returned: {final_request.status_code} (Expect 200)") - self.assertTrue(final_request.status_code == 200) - - -class authed_download_test(unittest.TestCase): - def test_urs_auth_redirect_for_auth_downloads(self): - url = f"{APIROOT}/{METADATA_FILE}" - global cookiejar - - log.info(f"Hitting {url} with cookies to check for redirect") - r = requests.get(url, cookies=cookiejar, allow_redirects=False) - - log.info(f"Result r.status_code: {r.status_code} (Expect 303)") - self.assertTrue(r.status_code == 303) - - log.info(f"Result r.is_redirect: {r.is_redirect} (Expect True)") - self.assertTrue(r.is_redirect) - - log.info(f"Result r.headers['Location']: {r.headers['Location']}") - self.assertTrue(r.headers['Location'] is not None) - - log.info(f"Make sure 'Location' header is not redirecting to URS") - self.assertTrue('oauth/authorize' not in r.headers['Location']) - - def test_origin_request_header(self): - url = f"{APIROOT}/{METADATA_FILE}" - origin_request_value = "{0}".format(uuid1()) - headers = {"x-origin-request-id": origin_request_value } - global cookiejar - - log.info(f"Hitting {url} with x-origin-request-id={origin_request_value} Header") - r = requests.get(url, cookies=cookiejar, headers=headers, allow_redirects=False) - - log.info(f"Validating x-origin-request-id is passed back out successfully") - log.info(f"Response Headers: {r.headers}") - self.assertTrue(r.headers.get('x-origin-request-id') == origin_request_value) - - def test_range_request_works(self): - url = f"{APIROOT}/{METADATA_FILE}" - headers = {"Range": "bytes=1035-1042"} - global cookiejar - - log.info(f"Making range request ({headers}) from {url}") - r = requests.get(url, cookies=cookiejar, headers=headers) - - log.info(f'Range Request Return Code: {r.status_code} (Expect 206)') - self.assertTrue(r.status_code == 206) - - log.info(f"Range Request returned {len(r.text)} bytes of data. (Expect 8) ") - log.info(f"Range Data: {r.text}") - self.assertTrue(len(r.text) == 8) - - def test_approved_user_can_access_private_data(self): - url = f'{APIROOT}/PRIVATE/ACCESS/testfile' - global cookiejar - - log.info(f"Attempting to access an approved PRIVATE file: {url}") - r = requests.get(url, cookies=cookiejar) - - log.info(f"APPROVED Private File check: {r.status_code} (Expect 200)") - self.assertTrue(r.status_code == 200) - - def test_approved_user_cant_access_private_data(self): - url = f"{APIROOT}/PRIVATE/NOACCESS/testfile" - global cookiejar - - log.info(f"Attempting to access an UNapproved PRRIVATE file: {url}") - r = requests.get(url, cookies=cookiejar) - - log.info(f"UNAPPROVED Private File check: {r.status_code} (Expect 403)") - self.assertTrue(r.status_code == 403) - - def test_validating_objects_with_prefix(self): - url = f"{APIROOT}/SA/BROWSE/dir1/dir2/deepfile.txt" - global cookiejar - - log.info(f"Attempting to validate an object with a prefix works: {url}") - r = requests.get(url, cookies=cookiejar) - - log.info(f'Checking file content: {r.content} (Should say "successfully downloaded")') - self.assertTrue("file was successfully downloaded" in str(r.content)) - - log.info(f"Pre-fixed object Return Code: {r.status_code} (Expect 200)") - self.assertTrue(r.status_code == 200) - - def test_validate_custom_headers(self): - url = f"{APIROOT}/{METADATA_FILE_CH}" - header_name = 'x-rainheader1' - global cookiejar - - log.info(f"Checking custom header ({header_name}) value for {url}") - r = requests.get(url, cookies=cookiejar, allow_redirects=False) - log.info(f"Got headers {r.headers}") - - header_value = r.headers.get(header_name) - log.info(f"{header_name} had value '{header_value}' (Expect 'rainheader1 value')") - self.assertTrue(r.headers.get(header_name) is not None) - - def test_validate_locate_handles_complex_configuration_key(self): - url = f"{APIROOT}/locate?bucket_name={LOCATE_BUCKET}" - global cookiejar - - log.info(f"Attempting to get bucket map paths for {LOCATE_BUCKET} @ {url}") - r = requests.get(url, cookies=cookiejar) - - log.info(f'Paths Output Should equal {MAP_PATHS}: {r.content}') - - paths = sorted(json.loads(r.content)) - self.assertEqual(paths, MAP_PATHS) - - @staticmethod - def find_bearer_token(): - global cookiejar - for cookie in cookiejar: - if cookie.name == 'asf-urs': - # Grab the JWT payload: - cookie_b64 = cookie.value.split(".")[1] - # Fix the padding: - cookie_b64 += '=' * (4 - (len(cookie_b64) % 4)) - # Decode & Load... - cookie_json = json.loads(base64.b64decode(cookie_b64)) - if 'urs-access-token' in cookie_json: - return cookie_json['urs-access-token'] - return None - - def validate_bearer_token_works(self, url): - token = self.find_bearer_token() - - log.info(f"Make sure we were able to decode a token from the cookie: {token} (Expect not None)") - self.assertTrue(token is not None) - - log.info(f"Attempting to download {url} using the token as a Bearer token") - r = requests.get(url, headers={"Authorization": f"Bearer {token}"}) - - log.info(f"Bearer Token Download attempt Return Code: {r.status_code} (Expect 200)") - self.assertEqual(r.status_code, 200) - - def test_validate_bearer_token_works(self): - url = f"{APIROOT}/{METADATA_FILE}" - self.validate_bearer_token_works(url) - - def test_validate_private_file_bearer_token_works(self): - url = f'{APIROOT}/PRIVATE/ACCESS/testfile' - self.validate_bearer_token_works(url) - - -class cors_test(unittest.TestCase): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def test_cors(self): - - origin_host = "https://something.asf.alaska.edu" - - if os.getenv('USE_CORS','') == 'False': - log.info("CORS is not enabled") - self.assertTrue(True) - - url = f"{APIROOT}/{METADATA_FILE_CH}" - global cookiejar - origin_headers = {"origin": origin_host} - - r = requests.get(url, cookies=cookiejar, headers=origin_headers, allow_redirects=False) - log.info(f"Got headers {r.headers}") - - self.access_control_allow_origin_configuration_test(r, origin_host) - self.access_control_allow_creds_test(r) - self.null_origin_cors_request_header(url, cookiejar) - - def access_control_allow_origin_configuration_test(self, r, origin_host): - header_name = 'Access-Control-Allow-Origin' - - header_value = r.headers.get(header_name) - log.info(f"{header_name} had value '{origin_host}' (Expect {origin_host})") - self.assertTrue(header_value == origin_host) - - def access_control_allow_creds_test(self, r): - header_name = 'Access-Control-Allow-Credentials' - - header_value = r.headers.get(header_name) - log.info(f"{header_name} had value '{header_value}' (Expect True") - self.assertTrue(header_value == 'true') - - def null_origin_cors_request_header(self, url, cookiejar): - headers = {"origin": "null"} - - log.info(f"Hitting {url} with Origin=null Header") - r = requests.get(url, cookies=cookiejar, headers=headers, allow_redirects=False) - - log.info(f"Validating Access-Control-Allow-Origin=null is returned in {r.headers}") - self.assertTrue(r.headers.get('Access-Control-Allow-Origin') == 'null') - - -class jwt_blacklist_test(unittest.TestCase): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - global STACKNAME - global cookiejar - self.cookie_jar = cookiejar - self.stack_name = STACKNAME - self.aws_lambda_client = boto3.client('lambda') - self.aws_function_name = f'{STACKNAME}-EgressLambda' - self.url = f"{APIROOT}/{METADATA_FILE}" - - def set_original_env_vars(self, env): - original_env_vars = self.aws_lambda_client.update_function_configuration(FunctionName=self.aws_function_name, - Environment=env) - log.info(f"Attempt to set environment variables back to their original state: {original_env_vars}") - time.sleep(3) - - def set_up_temp_env_vars(self, endpoint): - endpoint_dict = {"BLACKLIST_ENDPOINT": endpoint} - lambda_configuration = self.aws_lambda_client.get_function_configuration( - FunctionName=self.aws_function_name - ) - - new_env_vars = lambda_configuration["Environment"] - new_env_vars["Variables"].update(endpoint_dict) - - log.info(f"Temporarily updated function {self.aws_function_name}'s env variables") - env_vars_update = self.aws_lambda_client.update_function_configuration(FunctionName=self.aws_function_name, - Environment=new_env_vars) - time.sleep(3) - log.info(f"Update status: {env_vars_update}") - - return lambda_configuration - - def test_validate_invalid_jwt(self): - - try: - endpoint = os.getenv("BLACKLIST_ENDPOINT", - "https://s3-us-west-2.amazonaws.com/asf.rain.code.usw2/jwt_blacklist.json") - log.info(f"Using the endpoint: {endpoint} to test a invalid JWT with the blacklist functionality") - - lambda_configuration = self.set_up_temp_env_vars(endpoint) - - r = requests.get(self.url, cookies=self.cookie_jar, allow_redirects=False) - log.info(f"Blacklisted JWTs should result in a redirect to EDL. r.is_redirect: {r.is_redirect} (Expect True)") - self.assertTrue(r.is_redirect) - - log.info(f"Result r.headers['Location']: {r.headers['Location']}") - self.assertTrue(r.headers['Location'] is not None) - - log.info(f"Make sure 'Location' header is redirecting to URS") - self.assertTrue('oauth/authorize' in r.headers['Location']) - - except Exception as e: - log.info(e) - self.assertTrue(False) - - log.info("Reverting to original environment variables") - self.set_original_env_vars(lambda_configuration["Environment"]) - - def test_validate_valid_jwt(self): - try: - endpoint = os.getenv("VALID_JWT_BLACKLIST_ENDPOINT", - "https://s3-us-west-2.amazonaws.com/asf.rain.code.usw2/valid_jwt_blacklist_test.json") - log.info(f"Using the endpoint: {endpoint} to test a valid JWT with the blacklist functionality") - - lambda_configuration = self.set_up_temp_env_vars(endpoint) - - r = requests.get(self.url, cookies=self.cookie_jar) - self.assertTrue(r.status_code == 200) - except Exception as e: - log.info(e) - self.assertTrue(False) - - log.info("Reverting to original environment variables") - self.set_original_env_vars(lambda_configuration["Environment"]) - - -def main(): - failures = 0 - tests = 0 - - # We need the tests to run in this order. - for test in (unauthed_download_test, auth_download_test, authed_download_test, jwt_blacklist_test, cors_test): - suite = unittest.TestLoader().loadTestsFromTestCase(test) - result = unittest.TextTestRunner().run(suite) - - # Check the results - if result.failures: - # Unexpected asserts - log.info(f"Test {test().id()} had {len(result.failures)} Failures") - failures += len(result.failures) - - if result.errors: - # Malformed Tests - log.info(f"Test {test().id()} had {len(result.errors)} Errors") - failures += len(result.errors) - - tests += result.testsRun - - log.info(f"Test had {failures} failures in {tests} tests") - # Build Test File Json Object - if (failures < 1): - message = "All Tests Passed" - color = "success" - exit_code = 0 - elif (failures < 3): - message = f"{failures} of {tests} Tests Failed ⚠z" - color = "important" - exit_code = 1 - else: - message = f"{failures} of {tests} Tests Failed ☠" - color = "critical" - exit_code = 1 - - # Write out the string - testresults = json.dumps({"schemaVersion": 1, "label": "Tests", "message": message, "color": color}) - - # Required to make the file public and usable as input for the badge. - acls_and_stuff = {"CacheControl": "no-cache", "Expires": datetime(2015, 1, 1), - "ContentType": "application/json", "ACL": "public-read"} - - # Dump results to S3. - log.info(f"Writing test results: {testresults}") - boto3.resource('s3').Object(TEST_RESULT_BUCKET, TEST_RESULT_OBJECT).put(Body=testresults, **acls_and_stuff) - - # We need a non-zero exit code if we had any failures - sys.exit(exit_code) - - -if __name__ == '__main__': - if env_var_check(): - main() diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 00000000..3db9b37a --- /dev/null +++ b/codecov.yml @@ -0,0 +1,11 @@ +comment: false + +coverage: + status: + project: + default: + threshold: 0.1% + + patch: + default: + target: 85% diff --git a/config/bucket-map-template.yaml b/config/bucket-map-template.yaml new file mode 100644 index 00000000..0c344829 --- /dev/null +++ b/config/bucket-map-template.yaml @@ -0,0 +1,7 @@ +MAP: +# See examples +# BROWSE: +# PLATFORM-A: platform-a-bucket +# +# PUBLIC_BUCKETS: +# - platform-a-bucket diff --git a/lambda/app.py b/lambda/app.py index eb237fed..155ae7af 100644 --- a/lambda/app.py +++ b/lambda/app.py @@ -3,21 +3,34 @@ import time from urllib import request from urllib.error import HTTPError -from urllib.parse import urlparse, quote_plus, urlencode +from urllib.parse import quote_plus, urlencode, urlparse +import cachetools import flatdict from botocore.config import Config as bc_Config from botocore.exceptions import ClientError +from cachetools.func import ttl_cache +from cachetools.keys import hashkey from chalice import Chalice, Response - -from rain_api_core.aws_util import get_yaml_file, get_s3_resource, get_role_session, get_role_creds, \ - check_in_region_request -from rain_api_core.egress_util import get_presigned_url, process_request, check_private_bucket, check_public_bucket -from rain_api_core.general_util import get_log, log_context, return_timing_object, duration -from rain_api_core.urs_util import get_urs_url, do_login, user_in_group, get_urs_creds, user_profile_2_jwt_payload, \ - get_new_token_and_profile -from rain_api_core.view_util import get_html_body, get_cookie_vars, make_set_cookie_headers_jwt, get_jwt_keys, \ - JWT_COOKIE_NAME, JWT_ALGO +from rain_api_core.aws_util import check_in_region_request, get_role_creds, get_role_session, get_yaml_file +from rain_api_core.egress_util import check_private_bucket, check_public_bucket, get_presigned_url, process_request +from rain_api_core.general_util import duration, get_log, log_context, return_timing_object +from rain_api_core.urs_util import ( + do_login, + get_new_token_and_profile, + get_urs_creds, + get_urs_url, + user_in_group, + user_profile_2_jwt_payload +) +from rain_api_core.view_util import ( + JWT_ALGO, + JWT_COOKIE_NAME, + get_cookie_vars, + get_html_body, + get_jwt_keys, + make_set_cookie_headers_jwt +) log = get_log() conf_bucket = os.getenv('CONFIG_BUCKET', "rain-t-config") @@ -25,17 +38,11 @@ # Here's a lifetime-of lambda cache of these values: bucket_map_file = os.getenv('BUCKET_MAP_FILE', 'bucket_map.yaml') b_map = None -b_region_map = {} -bc_client_cache = {} -s3_resource = get_s3_resource() +# TODO(reweeden): Refactor when wrapped attributes are implemented +# https://github.com/tkem/cachetools/issues/176 +get_bucket_region_cache = cachetools.LRUCache(maxsize=128) STAGE = os.getenv('STAGE_NAME', 'DEV') -header_map = {'date': 'Date', - 'last-modified': 'Last-Modified', - 'accept-ranges': 'Accept-Ranges', - 'etag': 'ETag', - 'content-type': 'Content-Type', - 'content-length': 'Content-Length'} class TeaChalice(Chalice): @@ -73,17 +80,29 @@ def __init__(self, payload: dict): def get_request_id() -> str: + assert app.lambda_context is not None + return app.lambda_context.aws_request_id + +# TODO(reweeden): fix typing +# typing module causes errors in AWS lambda? def get_origin_request_id() -> str: + assert app.current_request is not None + return app.current_request.headers.get("x-origin-request-id") + def get_aux_request_headers(): - req_headers = { "x-request-id": get_request_id() } - if get_origin_request_id(): - req_headers.update( { "x-origin-request-id": get_origin_request_id() } ) + req_headers = {"x-request-id": get_request_id()} + origin_request_id = get_origin_request_id() + + if origin_request_id: + req_headers["x-origin-request-id"] = origin_request_id + return req_headers + def check_for_browser(hdrs): return 'user-agent' in hdrs and hdrs['user-agent'].lower().startswith('mozilla') @@ -97,8 +116,10 @@ def get_user_from_token(token): :return: user ID of requesting user. """ + urs_creds = get_urs_creds() + params = { - 'client_id': get_urs_creds()['UrsId'], + 'client_id': urs_creds['UrsId'], # The client_id of the non SSO application you registered with Earthdata Login 'token': token } @@ -106,7 +127,7 @@ def get_user_from_token(token): url = '{}/oauth/tokens/user?{}'.format(os.getenv('AUTH_BASE_URL', 'https://urs.earthdata.nasa.gov'), urlencode(params)) - authval = "Basic {}".format(get_urs_creds()['UrsAuth']) + authval = "Basic {}".format(urs_creds['UrsAuth']) headers = {'Authorization': authval} # Tack on auxillary headers @@ -120,14 +141,14 @@ def get_user_from_token(token): response = request.urlopen(req) except HTTPError as e: response = e - log.debug(e) + log.debug("%s", e) payload = response.read() log.info(return_timing_object(service="EDL", endpoint=url, method="POST", duration=duration(timer))) try: msg = json.loads(payload) - except json.JSONDecodeError as e: + except json.JSONDecodeError: log.error(f'could not get json message from payload: {payload}') msg = {} @@ -144,7 +165,9 @@ def get_user_from_token(token): return None elif response.code == 403: if 'error_description' in msg and 'eula' in msg['error_description'].lower(): - # sample json in this case: `{"status_code":403,"error_description":"EULA Acceptance Failure","resolution_url":"http://uat.urs.earthdata.nasa.gov/approve_app?client_id=LqWhtVpLmwaD4VqHeoN7ww"}` + # sample json in this case: + # `{"status_code": 403, "error_description": "EULA Acceptance Failure", + # "resolution_url": "http://uat.urs.earthdata.nasa.gov/approve_app?client_id=LqWhtVpLmwaD4VqHeoN7ww"}` log.warning('user needs to sign the EULA') raise EulaException(msg) # Probably an expired token if here @@ -164,8 +187,7 @@ def get_user_from_token(token): def cumulus_log_message(outcome: str, code: int, http_method: str, k_v: dict): k_v.update({'code': code, 'http_method': http_method, 'status': outcome, 'requestid': get_request_id()}) - jsonstr = json.dumps(k_v) - print(f'{jsonstr}') + print(json.dumps(k_v)) def restore_bucket_vars(): @@ -186,7 +208,11 @@ def do_auth_and_return(ctxt): here = ctxt['path'] if os.getenv('DOMAIN_NAME'): # Pop STAGE value off the request if we have a custom domain - here = '/'.join([""] + here.split('/')[2:]) if here.startswith('/{}/'.format(STAGE)) else here + # TODO(reweeden): python3.9 use `str.removeprefix` + prefix = f'/{STAGE}' + if here.startswith(prefix): + here = here[len(prefix):] + log.info("here will be {0}".format(here)) redirect_here = quote_plus(here) urs_url = get_urs_url(ctxt, redirect_here) @@ -194,23 +220,25 @@ def do_auth_and_return(ctxt): return Response(body='', status_code=302, headers={'Location': urs_url}) -def send_cors_headers(headers): +def add_cors_headers(headers): + assert app.current_request is not None + # send CORS headers if we're configured to use them - if 'origin' in app.current_request.headers: + origin_header = app.current_request.headers.get('origin') + if origin_header is not None: cors_origin = os.getenv("CORS_ORIGIN") - origin_header = app.current_request.headers['origin'] - if cors_origin and ( origin_header.endswith(cors_origin) or origin_header.lower() == 'null' ): - headers['Access-Control-Allow-Origin'] = app.current_request.headers['origin'] + if cors_origin and (origin_header.endswith(cors_origin) or origin_header.lower() == 'null'): + headers['Access-Control-Allow-Origin'] = origin_header headers['Access-Control-Allow-Credentials'] = 'true' else: - log.warning(f'Origin {app.current_request.headers["origin"]} is not an approved CORS host: {cors_origin}') + log.warning(f'Origin {origin_header} is not an approved CORS host: {cors_origin}') def make_redirect(to_url, headers=None, status_code=301): if headers is None: headers = {} headers['Location'] = to_url - send_cors_headers(headers) + add_cors_headers(headers) log.info(f'Redirect created. to_url: {to_url}') cumulus_log_message('success', status_code, 'GET', {'redirect': 'yes', 'redirect_URL': to_url}) log.debug(f'headers for redirect: {headers}') @@ -234,37 +262,41 @@ def get_bcconfig(user_id: str) -> dict: "read_timeout": 600, "retries": {"max_attempts": 10}} - if os.getenv('S3_SIGNATURE_VERSION'): - bcconfig['signature_version'] = os.getenv('S3_SIGNATURE_VERSION') + signature_version = os.getenv('S3_SIGNATURE_VERSION') + if signature_version: + bcconfig['signature_version'] = signature_version return bcconfig +@cachetools.cached( + get_bucket_region_cache, + # Cache by bucketname only + key=lambda _, bucketname: hashkey(bucketname) +) def get_bucket_region(session, bucketname) -> str: - # Figure out bucket region - params = {} - if bucketname in b_region_map: - return b_region_map[bucketname] - try: timer = time.time() - bucket_region = session.client('s3', **params).get_bucket_location(Bucket=bucketname)['LocationConstraint'] - bucket_region = 'us-east-1' if not bucket_region else bucket_region - log.info(return_timing_object(service="s3", endpoint=f"client().get_bucket_location({bucketname})", duration=duration(timer))) + bucket_region = session.client('s3').get_bucket_location(Bucket=bucketname)['LocationConstraint'] or 'us-east-1' + log.info(return_timing_object( + service="s3", + endpoint=f"client().get_bucket_location({bucketname})", + duration=duration(timer) + )) log.debug("bucket {0} is in region {1}".format(bucketname, bucket_region)) + + return bucket_region except ClientError as e: # We hit here if the download role cannot access a bucket, or if it doesn't exist log.error("Could not access download bucket {0}: {1}".format(bucketname, e)) raise - b_region_map[bucketname] = bucket_region - - return bucket_region - def get_user_ip(): - if app.current_request.headers.get('x-forwarded-for'): - x_forwarded_for = app.current_request.headers['x-forwarded-for'] + assert app.current_request is not None + + x_forwarded_for = app.current_request.headers.get('x-forwarded-for') + if x_forwarded_for: ip = x_forwarded_for.replace(' ', '').split(',')[0] log.debug(f"x-fowarded-for: {x_forwarded_for}") log.info(f"Assuming {ip} is the users IP") @@ -354,9 +386,10 @@ def try_download_from_bucket(bucket, filename, user_profile, headers: dict): log.debug("Presigned URL host was {0}".format(s3_host)) download_stat = {"bucket": bucket, "object": filename, "range": range_header} - download_stat.update({ "InRegion": "True" if is_in_region else "False"}) - if head_check.get("ContentLength"): - download_stat.update({ "size": head_check["ContentLength"]}) + download_stat.update({"InRegion": "True" if is_in_region else "False"}) + size = head_check.get("ContentLength") + if size is not None: + download_stat.update({"size": size}) log.info({"download": download_stat}) @@ -415,7 +448,6 @@ def logout(): template_vars = {'title': 'Logged Out', 'URS_URL': get_urs_url(app.current_request.context)} if JWT_COOKIE_NAME in cookievars: - template_vars['contentstring'] = 'You are logged out.' else: template_vars['contentstring'] = 'No active login found.' @@ -431,13 +463,13 @@ def logout(): @app.route('/login') def login(): try: + headers = {} aux_headers = get_aux_request_headers() status_code, template_vars, headers = do_login(app.current_request.query_params, app.current_request.context, os.getenv('COOKIE_DOMAIN', ''), aux_headers=aux_headers) except ClientError as e: - log.error(e) + log.error("%s", e) status_code = 500 - headers = {'x-request-id': app.lambda_context.aws_request_id} template_vars = { 'contentstring': 'Client Error occurred. ', 'title': 'Client Error', @@ -468,8 +500,8 @@ def locate(): return Response(body='Required "bucket_name" query paramater not specified', status_code=400, headers={'Content-Type': 'text/plain'}) - bucket_name = app.current_request.query_params.get('bucket_name', None) - bucket_map = collapse_bucket_configuration(get_yaml_file(conf_bucket,bucket_map_file)['MAP']) + bucket_name = query_params.get('bucket_name') + bucket_map = collapse_bucket_configuration(get_yaml_file(conf_bucket, bucket_map_file)['MAP']) search_map = flatdict.FlatDict(bucket_map, delimiter='/') matching_paths = [key for key, value in search_map.items() if value == bucket_name] if (len(matching_paths) > 0): @@ -498,32 +530,22 @@ def get_range_header_val(): return app.current_request.headers['range'] return None + def get_new_session_client(user_id): # Default Config - params = { "config": bc_Config(**get_bcconfig(user_id)) } + params = {"config": bc_Config(**get_bcconfig(user_id))} session = get_role_session(user_id=user_id) timer = time.time() - new_bc_client = {"client": session.client('s3', **params), "timestamp": timer} + new_bc_client = session.client('s3', **params) log.info(return_timing_object(service="s3", endpoint="session.client()", duration=duration(timer))) return new_bc_client -def bc_client_is_old(bc_client): - # refresh bc_client after 50 minutes - return (time.time() - bc_client["timestamp"]) >= (50 * 60) +# refresh bc_client after 50 minutes +@ttl_cache(ttl=50 * 60) def get_bc_config_client(user_id): - - if user_id not in bc_client_cache: - # This a new user, generate a new bc_Config client - bc_client_cache[user_id] = get_new_session_client(user_id) - - elif bc_client_is_old(bc_client_cache[user_id]): - # Replace the client if is more than 50 minutes old - log.info(f"Replacing old bc_Config_client for user {user_id}") - bc_client_cache[user_id] = get_new_session_client(user_id) - - return bc_client_cache[user_id]["client"] + return get_new_session_client(user_id) def get_data_dl_s3_client(): @@ -532,21 +554,21 @@ def get_data_dl_s3_client(): def try_download_head(bucket, filename): - t = [time.time()] #t0 + t = [time.time()] # t0 client = get_data_dl_s3_client() - t.append(time.time()) #t1 + t.append(time.time()) # t1 # Check for range request range_header = get_range_header_val() try: timer = time.time() if not range_header: - download = client.get_object(Bucket=bucket, Key=filename) + client.get_object(Bucket=bucket, Key=filename) else: - # Should both `client.get_object()` be `client.head_object()` ?!?!?! + # TODO: Should both `client.get_object()` be `client.head_object()` ?!?!?! log.info("Downloading range {0}".format(range_header)) - download = client.get_object(Bucket=bucket, Key=filename, Range=range_header) + client.get_object(Bucket=bucket, Key=filename, Range=range_header) log.info(return_timing_object(service="s3", endpoint="client.get_object()", duration=duration(timer))) - t.append(time.time()) #t2 + t.append(time.time()) # t2 except ClientError as e: log.warning("Could not get head for s3://{0}/{1}: {2}".format(bucket, filename, e)) # cumulus uses this log message for metrics purposes. @@ -559,29 +581,21 @@ def try_download_head(bucket, filename): {'reason': 'Could not find requested data', 's3': f'{bucket}/{filename}'}) return make_html_response(template_vars, headers, 404, 'error.html') - #WTF is happening here? - response_headers = {'Content-Type': download['ContentType']} - for header in download['ResponseMetadata']['HTTPHeaders']: - name = header_map[header] if header in header_map else header - value = download['ResponseMetadata']['HTTPHeaders'][header] if header != 'server' else 'egress' - log.debug("setting header {0} to {1}.".format(name, value)) - response_headers[name] = value - # Try Redirecting to HEAD. There should be a better way. user_id = get_jwt_field(get_cookie_vars(app.current_request.headers), 'urs-user-id') log_context(user_id=user_id) # Generate URL - t.append(time.time()) #t3 + t.append(time.time()) # t3 creds, offset = get_role_creds(user_id=user_id) url_lifespan = 3600 - offset session = get_role_session(creds=creds, user_id=user_id) - t.append(time.time()) #t4 + t.append(time.time()) # t4 bucket_region = get_bucket_region(session, bucket) - t.append(time.time()) #t5 + t.append(time.time()) # t5 presigned_url = get_presigned_url(creds, bucket, filename, bucket_region, url_lifespan, user_id, 'HEAD') - t.append(time.time()) #t6 + t.append(time.time()) # t6 s3_host = urlparse(presigned_url).netloc # Return a redirect to a HEAD @@ -604,27 +618,29 @@ def dynamic_url_head(): restore_bucket_vars() t.append(time.time()) - if 'proxy' in app.current_request.uri_params: - path, bucket, filename, _ = process_request(app.current_request.uri_params['proxy'], b_map) - t.append(time.time()) + param = app.current_request.uri_params.get('proxy') + if param is None: + return Response(body='HEAD failed', headers={}, status_code=400) - process_results = 'path: {}, bucket: {}, filename:{}'.format(path, bucket, filename) - log.debug(process_results) + path, bucket, filename, _ = process_request(param, b_map) + t.append(time.time()) - if not bucket: - template_vars = {'contentstring': 'Bucket not available', - 'title': 'Bucket not available', - 'requestid': get_request_id(), } - headers = {} - return make_html_response(template_vars, headers, 404, 'error.html') - t.append(time.time()) - log.debug('timing for dynamic_url_head()') - log.debug('ET for restore_bucket_vars(): {}s'.format(t[1] - t[0])) - log.debug('ET for process_request(): {}s'.format(t[2] - t[1])) - log.debug('ET for total: {}s'.format(t[3] - t[0])) + process_results = 'path: {}, bucket: {}, filename:{}'.format(path, bucket, filename) + log.debug(process_results) - return try_download_head(bucket, filename) - return Response(body='HEAD failed', headers={}, status_code=400) + if not bucket: + template_vars = {'contentstring': 'Bucket not available', + 'title': 'Bucket not available', + 'requestid': get_request_id(), } + headers = {} + return make_html_response(template_vars, headers, 404, 'error.html') + t.append(time.time()) + log.debug('timing for dynamic_url_head()') + log.debug('ET for restore_bucket_vars(): {}s'.format(t[1] - t[0])) + log.debug('ET for process_request(): {}s'.format(t[2] - t[1])) + log.debug('ET for total: {}s'.format(t[3] - t[0])) + + return try_download_head(bucket, filename) def handle_auth_bearer_header(token): @@ -640,12 +656,17 @@ def handle_auth_bearer_header(token): except EulaException as e: log.warning('user has not accepted EULA') + # TODO(reweeden): changing the response based on user agent looks like a really bad idea... if check_for_browser(app.current_request.headers): - template_vars = {'title': e.payload['error_description'], - 'status_code': 403, - 'contentstring': f'Could not fetch data because "{e.payload["error_description"]}". Please accept EULA here: {e.payload["resolution_url"]} and try again.', - 'requestid': get_request_id(), - } + template_vars = { + 'title': e.payload['error_description'], + 'status_code': 403, + 'contentstring': ( + f'Could not fetch data because "{e.payload["error_description"]}". Please accept EULA here: ' + f'{e.payload["resolution_url"]} and try again.' + ), + 'requestid': get_request_id(), + } return 'return', make_html_response(template_vars, {}, 403, 'error.html') return 'return', Response(body=e.payload, status_code=403, headers={}) @@ -699,11 +720,16 @@ def dynamic_url(): if pub_bucket: log.debug("Accessing public bucket {0}".format(path)) elif not user_profile: - if 'Authorization' in app.current_request.headers and app.current_request.headers['Authorization'].split()[ - 0].lower() == 'bearer': + authorization = app.current_request.headers.get('Authorization') + if not authorization: + return do_auth_and_return(app.current_request.context) + + method, token, *_ = authorization.split() + method = method.lower() + + if method == "bearer": # we will deal with "bearer" auth here. "Basic" auth will be handled by do_auth_and_return() - log.debug('we got an Authorization header. {}'.format(app.current_request.headers['Authorization'])) - token = app.current_request.headers['Authorization'].split()[1] + log.debug('we got an Authorization header. {}'.format(authorization)) action, data = handle_auth_bearer_header(token) if action == 'return': @@ -718,7 +744,6 @@ def dynamic_url(): log.debug(f"Encoding JWT_PAYLOAD: {jwt_payload}") custom_headers.update(make_set_cookie_headers_jwt(jwt_payload, '', os.getenv('COOKIE_DOMAIN', ''))) cookievars[JWT_COOKIE_NAME] = jwt_payload - else: return do_auth_and_return(app.current_request.context) diff --git a/lambda/requirements.txt b/lambda/requirements.txt deleted file mode 100644 index 7ddd99a6..00000000 --- a/lambda/requirements.txt +++ /dev/null @@ -1,14 +0,0 @@ -aws-requests-auth==0.4.3 -cfnresponse==1.1.1 -chalice==1.26.2 -cryptography==35.0.0 -flatdict==4.0.1 -jinja2==3.0.2 -jwcrypto==1.0 -netaddr==0.8.0 -pyjwt==2.3.0 -pyOpenSSL==21.0.0 # maybe not necessary -python-jose==3.3.0 -PyYAML==6.0 - -pip>=19.2 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/lambda/tea_bumper.py b/lambda/tea_bumper.py index 70161d5d..5a048dae 100644 --- a/lambda/tea_bumper.py +++ b/lambda/tea_bumper.py @@ -1,15 +1,17 @@ -import json -import boto3 import os - from datetime import datetime + +import boto3 from rain_api_core.general_util import get_log + log = get_log() client = boto3.client('lambda') TEA_LAMBDA_NAME = os.getenv('TEA_LAMBDA_NAME') + def lambda_handler(event, context): + del event log.info('teabumper!') @@ -25,7 +27,3 @@ def lambda_handler(event, context): Environment=egress_env ) log.debug(response) - - -def version(): - return json.dumps({'version_id': ''}) diff --git a/lambda/update_lambda.py b/lambda/update_lambda.py index 6b819c79..a209ce12 100644 --- a/lambda/update_lambda.py +++ b/lambda/update_lambda.py @@ -1,9 +1,10 @@ import json -import boto3 -import urllib.request import os -from netaddr import cidr_merge +import urllib.request + +import boto3 import cfnresponse +from netaddr import cidr_merge def lambda_handler(event, context): @@ -16,7 +17,7 @@ def lambda_handler(event, context): print(f"Current region in {current_region}") cidr_list = get_region_cidrs(current_region) - # Get the base policy and add IP list as a conidtion + # Get the base policy and add IP list as a condition new_policy = get_base_policy(os.getenv("prefix")) new_policy["Statement"][0]["Condition"] = {"IpAddress": {"aws:SourceIp": cidr_list}} @@ -59,67 +60,61 @@ def get_region_cidrs(current_region): item["service"] == "AMAZON" and item["region"] == current_region] # It's important to filter down the CIDR range as much as possible. Too large can cause the role creation to fail. in_region_amazon_ips = [str(ip) for ip in cidr_merge(in_region_amazon_ips)] - # Add in Privagte IP Space + # Add in Private IP Space in_region_amazon_ips.append('10.0.0.0/8') - return (in_region_amazon_ips) + return in_region_amazon_ips def get_base_policy(prefix): vpcid = os.getenv('vpcid') - policy = """ - - { - "Version": "2012-10-17", - "Statement": [ - { - "Action": [ - "s3:GetObject", - "s3:ListBucket", - "s3:GetBucketLocation" - ], - "Resource": [ - """ + f'"arn:aws:s3:::{prefix}' + """*/*", - """ + f'"arn:aws:s3:::{prefix}' + """*" - ], - "Effect": "Allow" - }, - { - "Action": [ - "s3:GetObject", - "s3:ListBucket", - "s3:GetBucketLocation" - ], - "Resource": [ - """ + f'"arn:aws:s3:::{prefix}' + """*/*", - """ + f'"arn:aws:s3:::{prefix}' + """*" - ], - "Effect": "Allow", - "Condition": { - "StringEquals": { - "aws:SourceVpc": """ + f'"{vpcid}"' + """ + return { + "Version": "2012-10-17", + "Statement": [ + { + "Action": [ + "s3:GetObject", + "s3:ListBucket", + "s3:GetBucketLocation" + ], + "Resource": [ + f"arn:aws:s3:::{prefix}*/*", + f"arn:aws:s3:::{prefix}*" + ], + "Effect": "Allow" + }, + { + "Action": [ + "s3:GetObject", + "s3:ListBucket", + "s3:GetBucketLocation" + ], + "Resource": [ + f"arn:aws:s3:::{prefix}*/*", + f"arn:aws:s3:::{prefix}*" + ], + "Effect": "Allow", + "Condition": { + "StringEquals": { + "aws:SourceVpc": f"{vpcid}" + } } - } - }, - { - "Action": [ - "s3:GetObject", - "s3:ListBucket", - "s3:GetBucketLocation" - ], - "Resource": [ - """ + f'"arn:aws:s3:::{prefix}' + """*/*", - """ + f'"arn:aws:s3:::{prefix}' + """*" - ], - "Effect": "Allow", - "Condition": { - "StringLike": { - "aws:SourceVpc": "vpc-*" + }, + { + "Action": [ + "s3:GetObject", + "s3:ListBucket", + "s3:GetBucketLocation" + ], + "Resource": [ + f"arn:aws:s3:::{prefix}*/*", + f"arn:aws:s3:::{prefix}*" + ], + "Effect": "Allow", + "Condition": { + "StringLike": { + "aws:SourceVpc": "vpc-*" + } } } - } - ] -} - - """ - - return json.loads(policy) + ] + } diff --git a/rain-api-core b/rain-api-core deleted file mode 160000 index 6a7c0343..00000000 --- a/rain-api-core +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 6a7c03433a24eb00be8001b785f9496adc96eecb diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 00000000..b3ad615f --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,4 @@ +boto3==1.20.54 +pytest-cov==3.0.0 +pytest==7.0.1 +pyyaml==6.0 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..72a40d67 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +cachetools==5.0.0 +cfnresponse==1.1.2 +chalice==1.26.6 +flatdict==4.0.1 +git+https://github.com/asfadmin/rain-api-core.git@17f5e026749da048553fab825543cbf76c8a4f77 +netaddr==0.8.0 + +pip>=19.2 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/scripts/sed.py b/scripts/sed.py new file mode 100644 index 00000000..afd176aa --- /dev/null +++ b/scripts/sed.py @@ -0,0 +1,47 @@ +import argparse +import re + + +def main(pattern, replacement, path_in, path_out): + # For code simplicity, always copy the whole string to memory + with open(path_in) as f: + contents = f.read() + + new_contents = re.sub(pattern, replacement, contents) + + with open(path_out, "w") as f: + f.write(new_contents) + + +def substitute(pattern, replacement, buf_in, buf_out): + for line in buf_in: + buf_out.write(re.sub(pattern, replacement, line)) + + +def pattern(arg): + try: + return re.compile(arg, flags=re.MULTILINE) + except re.error: + raise ValueError() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("pattern", help="Regular expression to search for", type=pattern) + parser.add_argument("replacement", help="Text to substitute") + parser.add_argument( + "-i", "--infile", + help="Input file to read from", + required=True + ) + parser.add_argument( + "-o", "--outfile", + help="Input file to read from", + default=None + ) + + args = parser.parse_args() + + path_in = args.infile + path_out = args.outfile or path_in + main(args.pattern, args.replacement, path_in, path_out) diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..43be31fd --- /dev/null +++ b/setup.cfg @@ -0,0 +1,9 @@ +[isort] +multi_line_output = 3 +line_length = 120 +# By default the 'build' directory is skipped +skip = +skip_gitignore = true + +[flake8] +max-line-length = 120 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..07586bda --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,55 @@ +import importlib +import logging +import os +import pathlib +import sys +from typing import IO, Any +from unittest import mock + +import pytest + +# Can't import normally because 'lambda' is a reserved word +# Need to import the app module first because it will override the log level at import time +_ = importlib.import_module("lambda.app") + +RESOURCES_PATH = pathlib.Path(__file__).parent.joinpath("resources/").absolute() + +logging.getLogger().setLevel(0) + + +# Mock out calls to boto3 by replacing the module in sys.modules +real_boto3 = importlib.import_module("boto3") +mock_boto3 = mock.create_autospec(real_boto3) +sys.modules["boto3"] = mock_boto3 + + +@pytest.fixture(scope="session", autouse=True) +def aws_config(): + """Set up aws cli/boto configuration + + This makes sure we don't accidentally touch real resources. + """ + # NOTE: This will persist beyond the pytest session, + # however, the process should terminate immediately afterwards. + os.environ["AWS_ACCESS_KEY_ID"] = "TEST_ACCESS_KEY_ID" + os.environ["AWS_SECRET_ACCESS_KEY"] = "TEST_SECRET_ACCESS_KEY" + os.environ["AWS_SECURITY_TOKEN"] = "TEST_SECURITY_TOKEN" + os.environ["AWS_SESSION_TOKEN"] = "TEST_SESSION_TOKEN" + os.environ["AWS_DEFAULT_REGION"] = "us-east-1" + + +class Resources(): + """Helper for getting test resources""" + def open(self, file, *args, **kwargs) -> IO[Any]: + return (RESOURCES_PATH / file).open(*args, **kwargs) + + +@pytest.fixture(scope="session") +def resources(): + return Resources() + + +@pytest.fixture +def boto3(): + mock_boto3.reset_mock() + return mock_boto3 diff --git a/tests/resources/bucket_map_example.yaml b/tests/resources/bucket_map_example.yaml new file mode 100644 index 00000000..16c45bae --- /dev/null +++ b/tests/resources/bucket_map_example.yaml @@ -0,0 +1,29 @@ +MAP: + RAW: + PLATFORM-A: pa-raw + PLATFORM-B: pb-raw + DATA-TYPE-1: + PLATFORM-A: pa-dt1 + PLATFORM-B: pb-dt1 + DATA-TYPE-2: + PLATFORM-A: pa-dt2 + PLATFORM-B: pb-dt2 + DATA-TYPE-3: + PLATFORM-A: pa-dt3 + PLATFORM-B: pb-dt3 + BROWSE: + PLATFORM-A: pa-bro + PLATFORM-B: pb-bro + PRIVATE: + PLATFORM-A: pa-priv + PLATFORM-B: pb-priv + +PUBLIC_BUCKETS: + - pa-bro + - pb-bro + +PRIVATE_BUCKETS: + pa-priv: + - PRIVATE_GROUP_A + pb-priv: + - PRIVATE_GROUP_B diff --git a/tests/test_app.py b/tests/test_app.py new file mode 100644 index 00000000..27066964 --- /dev/null +++ b/tests/test_app.py @@ -0,0 +1,1190 @@ +import contextlib +import importlib +import io +from unittest import mock +from urllib.error import HTTPError + +import chalice +import pytest +import yaml +from botocore.exceptions import ClientError +from chalice.test import Client + +MODULE = "lambda.app" +# Can't import normally because 'lambda' is a reserved word +app = importlib.import_module(MODULE) + + +@pytest.fixture +def _clear_caches(): + app.get_bc_config_client.cache_clear() + app.get_bucket_region_cache.clear() + + +@pytest.fixture(scope="module") +def client(): + return Client(app.app) + + +@pytest.fixture +def lambda_context(): + with mock.patch(f"{MODULE}.app.lambda_context") as ctx: + yield ctx + + +@pytest.fixture +def current_request(lambda_context): + lambda_context.aws_request_id = "request_1234" + with mock.patch(f"{MODULE}.app.current_request") as req: + yield req + + +@pytest.fixture +def mock_get_urs_creds(): + with mock.patch(f"{MODULE}.get_urs_creds", autospec=True) as m: + m.return_value = { + "UrsId": "stringofseeminglyrandomcharacters", + "UrsAuth": "verymuchlongerstringofseeminglyrandomcharacters" + } + yield m + + +@pytest.fixture +def mock_make_html_response(): + with mock.patch(f"{MODULE}.make_html_response", autospec=True) as m: + m.side_effect = lambda _1, headers, status_code, _4: chalice.Response( + body="Mock response", + headers=headers, + status_code=status_code + ) + yield m + + +@pytest.fixture +def mock_request(): + with mock.patch(f"{MODULE}.request", autospec=True) as m: + yield m + + +def test_get_request_id(lambda_context): + lambda_context.aws_request_id = "1234" + assert app.get_request_id() == "1234" + + +def test_get_origin_request_id(current_request): + current_request.headers = {} + assert app.get_origin_request_id() is None + + current_request.headers["x-origin-request-id"] = "1234" + assert app.get_origin_request_id() == "1234" + + +def test_get_aux_request_headers(current_request): + current_request.headers = {} + assert app.get_aux_request_headers() == {"x-request-id": "request_1234"} + + current_request.headers = {"x-origin-request-id": "1234"} + assert app.get_aux_request_headers() == {"x-request-id": "request_1234", "x-origin-request-id": "1234"} + + +def test_check_for_browser(): + FIREFOX_UA = "Mozilla/5.0 (X11; Linux x86_64; rv:93.0) Gecko/20100101 Firefox/93.0" + CHROME_UA = "Mozilla/5.0 (X11; CrOS x86_64 13982.82.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.157 Safari/537.36 " # noqa + EDGE_UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4501.0 Safari/537.36 Edg/91.0.866.0" # noqa + BINGBOT_UA = "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)" + + assert app.check_for_browser({}) is False + assert app.check_for_browser({"user-agent": FIREFOX_UA}) is True + assert app.check_for_browser({"user-agent": CHROME_UA}) is True + assert app.check_for_browser({"user-agent": EDGE_UA}) is True + assert app.check_for_browser({"user-agent": BINGBOT_UA}) is True + assert app.check_for_browser({"user-agent": "Not a valid user agent"}) is False + + +def test_get_user_from_token(mock_request, mock_get_urs_creds, current_request): + del current_request + + payload = '{"uid": "user_name"}' + mock_response = mock.Mock() + mock_response.read.return_value = payload + mock_response.code = 200 + mock_request.urlopen.return_value = mock_response + + assert app.get_user_from_token("token") == "user_name" + mock_get_urs_creds.assert_called_once() + + +def test_get_user_from_token_eula_error(mock_request, mock_get_urs_creds, current_request): + del current_request + + payload = """{ + "status_code": 403, + "error_description": "EULA Acceptance Failure", + "resolution_url": "http://uat.urs.earthdata.nasa.gov/approve_app?client_id=asdf" + } + """ + mock_request.urlopen.side_effect = HTTPError("", 403, "Forbidden", {}, io.StringIO(payload)) + + with pytest.raises(app.EulaException): + app.get_user_from_token("token") + mock_get_urs_creds.assert_called_once() + + +def test_get_user_from_token_other_error(mock_request, mock_get_urs_creds, current_request): + del current_request + + payload = """{ + "status_code": 401, + "error": "some error", + "error_description": "some error description" + } + """ + mock_request.urlopen.side_effect = HTTPError("", 401, "Bad Request", {}, io.StringIO(payload)) + + assert app.get_user_from_token("token") is None + mock_get_urs_creds.assert_called_once() + + +@pytest.mark.parametrize("code", (200, 403, 500)) +def test_get_user_from_token_json_error(mock_request, mock_get_urs_creds, current_request, code): + del current_request + + mock_request.urlopen.side_effect = HTTPError("", code, "Message", {}, io.StringIO("not valid json")) + + assert app.get_user_from_token("token") is None + mock_get_urs_creds.assert_called_once() + + +def test_cumulus_log_message(current_request): + del current_request + + strio = io.StringIO() + with contextlib.redirect_stdout(strio): + app.cumulus_log_message("outcome", 200, "GET", {"foo": "bar"}) + + assert strio.getvalue() == ( + '{"foo": "bar", "code": 200, "http_method": "GET", "status": "outcome", "requestid": "request_1234"}\n' + ) + + +@mock.patch(f"{MODULE}.get_yaml_file", autospec=True) +def test_restore_bucket_vars(mock_get_yaml_file, resources): + with resources.open("bucket_map_example.yaml") as f: + buckets = yaml.full_load(f) + + mock_get_yaml_file.return_value = buckets + app.b_map = None + + app.restore_bucket_vars() + + assert app.b_map == buckets + + +@mock.patch(f"{MODULE}.get_urs_url", autospec=True) +def test_do_auth_and_return(mock_get_urs_url, monkeypatch): + mock_get_urs_url.side_effect = lambda _ctx, redirect: redirect + + response = app.do_auth_and_return({"path": "/some/path"}) + assert response.body == "" + assert response.status_code == 302 + assert response.headers == {"Location": "%2Fsome%2Fpath"} + + monkeypatch.setenv("DOMAIN_NAME", "www.example.com") + + response = app.do_auth_and_return({"path": "/some/other/path"}) + assert response.body == "" + assert response.status_code == 302 + assert response.headers == {"Location": "%2Fsome%2Fother%2Fpath"} + + response = app.do_auth_and_return({"path": "/DEV/some/path"}) + assert response.body == "" + assert response.status_code == 302 + assert response.headers == {"Location": "%2Fsome%2Fpath"} + + +def test_send_cors_header(current_request, monkeypatch): + current_request.headers = {} + headers = {"foo": "bar"} + app.add_cors_headers(headers) + assert headers == {"foo": "bar"} + + monkeypatch.setenv("CORS_ORIGIN", "example.com") + + current_request.headers = {"origin": "NULL"} + headers = {"foo": "bar"} + app.add_cors_headers(headers) + assert headers == { + "foo": "bar", + "Access-Control-Allow-Origin": "NULL", + "Access-Control-Allow-Credentials": "true" + } + + current_request.headers = {"origin": "foo.example.com"} + headers = {"foo": "bar"} + app.add_cors_headers(headers) + assert headers == { + "foo": "bar", + "Access-Control-Allow-Origin": "foo.example.com", + "Access-Control-Allow-Credentials": "true" + } + + current_request.headers = {"origin": "foo.bar.com"} + headers = {"foo": "bar"} + app.add_cors_headers(headers) + assert headers == {"foo": "bar"} + + +def test_make_redirect(current_request): + current_request.headers = {} + + response = app.make_redirect("www.example.com") + assert response.body == "" + assert response.headers == {"Location": "www.example.com"} + assert response.status_code == 301 + + response = app.make_redirect("www.example.com", headers={"foo": "bar"}) + assert response.body == "" + assert response.headers == {"foo": "bar", "Location": "www.example.com"} + assert response.status_code == 301 + + +@mock.patch(f"{MODULE}.get_html_body", autospec=True) +def test_make_html_response(mock_get_html_body, monkeypatch): + mock_get_html_body.return_value = "" + + response = app.make_html_response({"foo": "bar"}, {"baz": "qux"}) + assert response.body == "" + assert response.status_code == 200 + assert response.headers == {"Content-Type": "text/html", "baz": "qux"} + mock_get_html_body.assert_called_once_with({"STAGE": "DEV", "status_code": 200, "foo": "bar"}, "root.html") + mock_get_html_body.reset_mock() + + monkeypatch.setenv("DOMAIN_NAME", "example.com") + response = app.make_html_response({}, {}, 301, "redirect.html") + assert response.body == "" + assert response.status_code == 301 + assert response.headers == {"Content-Type": "text/html"} + mock_get_html_body.assert_called_once_with({"STAGE": None, "status_code": 301}, "redirect.html") + + +def test_get_bcconfig(monkeypatch): + assert app.get_bcconfig("some_user_id") == { + "user_agent": "Thin Egress App for userid=some_user_id", + "s3": {"addressing_style": "path"}, + "connect_timeout": 600, + "read_timeout": 600, + "retries": {"max_attempts": 10} + } + + monkeypatch.setenv("S3_SIGNATURE_VERSION", "some_s3_signature") + assert app.get_bcconfig("another_user_id") == { + "user_agent": "Thin Egress App for userid=another_user_id", + "s3": {"addressing_style": "path"}, + "connect_timeout": 600, + "read_timeout": 600, + "retries": {"max_attempts": 10}, + "signature_version": "some_s3_signature" + } + + monkeypatch.setenv("S3_SIGNATURE_VERSION", "") + assert app.get_bcconfig("another_user_id") == { + "user_agent": "Thin Egress App for userid=another_user_id", + "s3": {"addressing_style": "path"}, + "connect_timeout": 600, + "read_timeout": 600, + "retries": {"max_attempts": 10} + } + + +def test_get_bucket_region(): + session = mock.Mock() + session.client().get_bucket_location.return_value = {"LocationConstraint": "us-west-2"} + assert app.get_bucket_region(session, "bucketname") == "us-west-2" + + session.client.side_effect = ClientError({}, "bar") + with pytest.raises(ClientError): + app.get_bucket_region(session, "bucketname2") + + +def test_get_bucket_region_cached(_clear_caches): + session = mock.Mock() + session.client().get_bucket_location.return_value = {"LocationConstraint": "us-west-2"} + assert app.get_bucket_region(session, "bucketname") == "us-west-2" + assert app.get_bucket_region(session, "bucketname") == "us-west-2" + assert app.get_bucket_region(session, "bucketname") == "us-west-2" + session.client().get_bucket_location.assert_called_once() + + +def test_get_user_ip(current_request): + current_request.headers = {"x-forwarded-for": "10.0.0.1"} + assert app.get_user_ip() == "10.0.0.1" + + current_request.headers = {"x-forwarded-for": "10. 0. 0. 1"} + assert app.get_user_ip() == "10.0.0.1" + + current_request.headers = {"x-forwarded-for": "10.0.0.1,192.168.0.1"} + assert app.get_user_ip() == "10.0.0.1" + + current_request.headers = {} + current_request.context = {"identity": {"sourceIp": "10.0.0.1"}} + assert app.get_user_ip() == "10.0.0.1" + + +@mock.patch(f"{MODULE}.check_in_region_request", autospec=True) +@mock.patch(f"{MODULE}.get_role_creds", autospec=True) +@mock.patch(f"{MODULE}.get_role_session", autospec=True) +@mock.patch(f"{MODULE}.get_presigned_url", autospec=True) +def test_try_download_from_bucket( + mock_get_presigned_url, + mock_get_role_session, + mock_get_role_creds, + mock_check_in_region_request, + current_request, + monkeypatch +): + monkeypatch.setenv("AWS_DEFAULT_REGION", "us-east-1") + monkeypatch.setenv("CORS_ORIGIN", "example.com") + current_request.headers = {"origin": "example.com"} + mock_get_role_creds.return_value = (mock.Mock(), 1000) + presigned_url = "somebucket.s3.us-west-2.amazonaws.com" + mock_get_presigned_url.return_value = presigned_url + client = mock_get_role_session().client() + client.get_bucket_location.return_value = {"LocationConstraint": "us-east-1"} + client.head_object.return_value = {"ContentLength": 2048} + + response = app.try_download_from_bucket("somebucket", "somefile", None, {}) + client.head_object.assert_called_once() + assert response.body == "" + assert response.status_code == 303 + assert response.headers == { + "Location": presigned_url, + "Cache-Control": "private, max-age=2540", + "Access-Control-Allow-Origin": "example.com", + "Access-Control-Allow-Credentials": "true" + } + mock_check_in_region_request.assert_called_once() + + # Hit some of the other code paths + monkeypatch.setenv("SUPPRESS_HEAD", "1") + monkeypatch.setenv("AWS_DEFAULT_REGION", "us-west-2") + client.head_object.reset_mock() + + response = app.try_download_from_bucket("somebucket", "somefile", None, "not a dict") + client.head_object.assert_not_called() + assert response.body == "" + assert response.status_code == 303 + assert response.headers == { + "Location": presigned_url, + "Cache-Control": "private, max-age=2540", + "Access-Control-Allow-Origin": "example.com", + "Access-Control-Allow-Credentials": "true" + } + + +@mock.patch(f"{MODULE}.check_in_region_request", autospec=True) +@mock.patch(f"{MODULE}.get_role_creds", autospec=True) +@mock.patch(f"{MODULE}.get_role_session", autospec=True) +def test_try_download_from_bucket_client_error( + mock_get_role_session, + mock_get_role_creds, + mock_check_in_region_request, + mock_make_html_response, + current_request, + _clear_caches +): + del current_request + + mock_get_role_creds.return_value = (mock.Mock(), 1000) + mock_get_role_session().client.side_effect = ClientError({}, "bar") + + app.try_download_from_bucket("somebucket", "somefile", None, {}) + mock_make_html_response.assert_called_once_with( + { + "contentstring": "There was a problem accessing download data.", + "title": "Data Not Available", + "requestid": "request_1234", + }, + {}, + 400, + "error.html" + ) + mock_check_in_region_request.assert_called_once() + + +@mock.patch(f"{MODULE}.check_in_region_request", autospec=True) +@mock.patch(f"{MODULE}.get_role_creds", autospec=True) +@mock.patch(f"{MODULE}.get_role_session", autospec=True) +@mock.patch(f"{MODULE}.get_bc_config_client", autospec=True) +def test_try_download_from_bucket_not_found( + mock_get_bc_config_client, + mock_get_role_session, + mock_get_role_creds, + mock_check_in_region_request, + mock_make_html_response, + current_request, + monkeypatch +): + del current_request + + monkeypatch.setenv("AWS_DEFAULT_REGION", "us-east-1") + mock_get_role_creds.return_value = (mock.Mock(), 1000) + mock_get_bc_config_client(None).head_object.side_effect = ClientError( + {"ResponseMetadata": {"HTTPStatusCode": 404}}, + "bar" + ) + + app.try_download_from_bucket("somebucket", "somefile", None, {}) + mock_make_html_response.assert_called_once_with( + { + "contentstring": "Could not find requested data.", + "title": "Data Not Available", + "requestid": "request_1234", + }, + {}, + 404, + "error.html" + ) + mock_get_role_creds.assert_called_once() + mock_get_role_session.assert_called_once() + mock_check_in_region_request.assert_called_once() + + +@mock.patch(f"{MODULE}.check_in_region_request", autospec=True) +@mock.patch(f"{MODULE}.get_role_creds", autospec=True) +@mock.patch(f"{MODULE}.get_role_session", autospec=True) +@mock.patch(f"{MODULE}.get_bc_config_client", autospec=True) +def test_try_download_from_bucket_invalid_range( + mock_get_bc_config_client, + mock_get_role_session, + mock_get_role_creds, + mock_check_in_region_request, + current_request, + monkeypatch +): + del current_request + + monkeypatch.setenv("AWS_DEFAULT_REGION", "us-east-1") + mock_get_role_creds.return_value = (mock.Mock(), 1000) + mock_get_bc_config_client(None).head_object.side_effect = ClientError( + {"ResponseMetadata": {"HTTPStatusCode": 416}}, + "bar" + ) + + response = app.try_download_from_bucket("somebucket", "somefile", None, {}) + assert response.body == "Invalid Range" + assert response.status_code == 416 + assert response.headers == {} + mock_get_role_creds.assert_called_once() + mock_get_role_session.assert_called_once() + mock_check_in_region_request.assert_called_once() + + +@mock.patch(f"{MODULE}.JWT_COOKIE_NAME", "asf-cookie") +def test_get_jwt_field(): + assert app.get_jwt_field({"asf-cookie": {"foo": "bar"}}, "foo") == "bar" + assert app.get_jwt_field({"asf-cookie": {}}, "foo") is None + assert app.get_jwt_field({}, "foo") is None + + +@mock.patch(f"{MODULE}.get_urs_url", autospec=True) +def test_root(mock_get_urs_url, mock_make_html_response, client): + urs_url = "urs.example.com" + mock_get_urs_url.return_value = urs_url + + client.http.get("/") + + mock_make_html_response.assert_called_once_with( + { + "title": "Welcome", + "URS_URL": "urs.example.com" + }, + {"Content-Type": "text/html"}, + 200, + "root.html" + ) + + +@mock.patch(f"{MODULE}.get_urs_url", autospec=True) +@mock.patch(f"{MODULE}.get_cookie_vars", autospec=True) +@mock.patch(f"{MODULE}.JWT_COOKIE_NAME", "asf-cookie") +def test_root_with_login( + mock_get_cookie_vars, + mock_get_urs_url, + mock_make_html_response, + monkeypatch, + client +): + monkeypatch.setenv("MATURITY", "DEV") + mock_get_cookie_vars.return_value = { + "asf-cookie": { + "urs-user-id": "user_name" + } + } + + client.http.get("/") + + mock_get_urs_url.assert_not_called() + mock_make_html_response.assert_called_once_with( + { + "title": "Welcome", + "profile": {"urs-user-id": "user_name"} + }, + {"Content-Type": "text/html"}, + 200, + "root.html" + ) + + # There is a profile but no user id + mock_make_html_response.reset_mock() + mock_get_cookie_vars.return_value = {"asf-cookie": {"foo": "bar"}} + monkeypatch.setenv("MATURITY", "TEST") + + client.http.get("/") + + mock_get_urs_url.assert_not_called() + mock_make_html_response.assert_called_once_with( + {"title": "Welcome"}, + {"Content-Type": "text/html"}, + 200, + "root.html" + ) + + # There is no profile + mock_make_html_response.reset_mock() + mock_get_cookie_vars.return_value = {"foo": "bar"} + mock_get_urs_url.return_value = "urs_url" + + client.http.get("/") + + mock_get_urs_url.assert_called_once() + mock_make_html_response.assert_called_once_with( + {"title": "Welcome", "URS_URL": "urs_url"}, + {"Content-Type": "text/html"}, + 200, + "root.html" + ) + + +@mock.patch(f"{MODULE}.get_urs_url", autospec=True) +@mock.patch(f"{MODULE}.get_cookie_vars", autospec=True) +@mock.patch(f"{MODULE}.make_set_cookie_headers_jwt", autospec=True) +@mock.patch(f"{MODULE}.JWT_COOKIE_NAME", "asf-cookie") +def test_logout( + mock_make_set_cookie_headers_jwt, + mock_get_cookie_vars, + mock_get_urs_url, + mock_make_html_response, + client +): + mock_get_urs_url.return_value = "urs_url" + mock_make_set_cookie_headers_jwt.return_value = {} + mock_get_cookie_vars.return_value = {"asf-cookie": {}} + + client.http.get("/logout") + + mock_make_html_response.assert_called_once_with( + { + "title": "Logged Out", + "URS_URL": "urs_url", + "contentstring": "You are logged out." + }, + {"Content-Type": "text/html"}, + 200, + "root.html" + ) + + +@mock.patch(f"{MODULE}.do_login", autospec=True) +def test_login(mock_do_login, client): + mock_do_login.return_value = (301, {"foo": "bar"}, {"baz": "qux"}) + + response = client.http.get("/login") + + assert response.body == b"" + assert response.status_code == 301 + assert response.headers == { + "x-request-id": app.app.lambda_context.aws_request_id, + "baz": "qux" + } + + +@mock.patch(f"{MODULE}.do_login", autospec=True) +def test_login_error(mock_do_login, mock_make_html_response, client): + mock_do_login.side_effect = ClientError({}, "foo") + + response = client.http.get("/login") + + assert response.headers["x-request-id"] == app.app.lambda_context.aws_request_id + mock_make_html_response.assert_called_once_with( + { + "contentstring": "Client Error occurred. ", + "title": "Client Error", + "requestid": app.app.lambda_context.aws_request_id + }, + {}, + 500, + "error.html" + ) + + +def test_version(monkeypatch, client): + response = client.http.get("/version") + + assert response.json_body == {"version_id": ""} + assert response.status_code == 200 + + monkeypatch.setenv("BUMP", "bump_version") + response = client.http.get("/version") + + assert response.json_body == {"version_id": "", "last_flush": "bump_version"} + assert response.status_code == 200 + + +@mock.patch(f"{MODULE}.get_yaml_file", autospec=True) +def test_locate(mock_get_yaml_file, resources, client): + with resources.open("bucket_map_example.yaml") as f: + mock_get_yaml_file.return_value = yaml.full_load(f) + + response = client.http.get("/locate?bucket_name=pa-dt1") + assert response.json_body == ["DATA-TYPE-1/PLATFORM-A"] + assert response.status_code == 200 + + response = client.http.get("/locate?bucket_name=nonexistent") + assert response.body == b"No route defined for nonexistent" + assert response.status_code == 404 + + +def test_locate_missing_bucket(client): + for req in ("/locate", "/locate?foo=bar"): + response = client.http.get(req) + assert response.body == b'Required "bucket_name" query paramater not specified' + assert response.status_code == 400 + assert response.headers == { + "x-request-id": app.app.lambda_context.aws_request_id, + "Content-Type": "text/plain" + } + + +def test_collapse_bucket_configuration(): + bucket_map = { + "foo": "bar", + "key1": { + "key2": { + "bucket": "bucket1" + } + }, + "bucket": { + "bucket": "bucket2" + }, + "key3": { + "bucket": { + "bucket": { + "bucket": "bucket3" + } + } + } + } + app.collapse_bucket_configuration(bucket_map) + + assert bucket_map == { + "foo": "bar", + "key1": { + "key2": "bucket1" + }, + "bucket": "bucket2", + "key3": { + "bucket": { + "bucket": "bucket3" + } + } + } + + +def test_get_range_header_val(current_request): + current_request.headers = {"Range": "v1"} + assert app.get_range_header_val() == "v1" + + current_request.headers = {"range": "v2"} + assert app.get_range_header_val() == "v2" + + current_request.headers = {"Range": "v1", "range": "v2"} + assert app.get_range_header_val() == "v1" + + current_request.headers = {} + assert app.get_range_header_val() is None + + +@mock.patch(f"{MODULE}.get_role_session", autospec=True) +def test_get_new_session_client(mock_get_role_session): + client = mock_get_role_session().client() + + assert app.get_new_session_client("user_name") == client + # Once in test setup and once during `get_new_session_client` + assert mock_get_role_session.call_count == 2 + mock_get_role_session.assert_called_with(user_id="user_name") + + +@mock.patch(f"{MODULE}.get_new_session_client", autospec=True) +def test_get_bc_config_client_cached(mock_get_new_session_client): + app.get_bc_config_client("user_name") + mock_get_new_session_client.assert_called_once_with("user_name") + app.get_bc_config_client("user_name") + mock_get_new_session_client.assert_called_once_with("user_name") + + +@mock.patch(f"{MODULE}.get_cookie_vars", autospec=True) +@mock.patch(f"{MODULE}.get_bc_config_client", autospec=True) +@mock.patch(f"{MODULE}.JWT_COOKIE_NAME", "asf-cookie") +def test_get_data_dl_s3_client(mock_get_bc_config_client, mock_get_cookie_vars): + mock_get_cookie_vars.return_value = { + "asf-cookie": { + "urs-user-id": "user_name" + } + } + + app.get_data_dl_s3_client() + mock_get_bc_config_client.assert_called_once_with("user_name") + + +@mock.patch(f"{MODULE}.get_data_dl_s3_client", autospec=True) +@mock.patch(f"{MODULE}.get_role_creds", autospec=True) +@mock.patch(f"{MODULE}.get_role_session", autospec=True) +@mock.patch(f"{MODULE}.get_presigned_url", autospec=True) +def test_try_download_head( + mock_get_presigned_url, + mock_get_role_session, + mock_get_role_creds, + mock_get_data_dl_s3_client, + current_request, + monkeypatch +): + monkeypatch.setenv("CORS_ORIGIN", "example.com") + current_request.headers = {"origin": "example.com"} + mock_get_role_creds.return_value = (mock.Mock(), 1000) + presigned_url = "somebucket.s3.us-west-2.amazonaws.com" + mock_get_presigned_url.return_value = presigned_url + + response = app.try_download_head("bucket", "filename") + + assert response.body == "" + assert response.status_code == 303 + assert response.headers == { + "Location": presigned_url, + "Access-Control-Allow-Origin": "example.com", + "Access-Control-Allow-Credentials": "true" + } + mock_get_data_dl_s3_client.assert_called_once() + mock_get_role_creds.assert_called_once() + mock_get_role_session.assert_called_once() + + +@mock.patch(f"{MODULE}.get_data_dl_s3_client", autospec=True) +def test_try_download_head_error( + mock_get_data_dl_s3_client, + current_request, + monkeypatch, + mock_make_html_response +): + monkeypatch.setenv("CORS_ORIGIN", "example.com") + current_request.headers = {"origin": "example.com"} + mock_get_data_dl_s3_client().get_object.side_effect = ClientError({}, "foo") + + app.try_download_head("bucket", "filename") + + mock_make_html_response.assert_called_once_with( + { + "contentstring": "File not found", + "title": "File not found", + "requestid": "request_1234" + }, + {}, + 404, + "error.html" + ) + + +@mock.patch(f"{MODULE}.get_yaml_file", autospec=True) +@mock.patch(f"{MODULE}.try_download_head", autospec=True) +def test_dynamic_url_head(mock_try_download_head, mock_get_yaml_file, resources, current_request): + mock_try_download_head.return_value = chalice.Response(body="Mock response", headers={}, status_code=200) + with resources.open("bucket_map_example.yaml") as f: + mock_get_yaml_file.return_value = yaml.full_load(f) + + current_request.uri_params = {"proxy": "DATA-TYPE-1/PLATFORM-A/OBJECT_1"} + + # Can't use the chalice test client here as it doesn't seem to understand the `{proxy+}` route + response = app.dynamic_url_head() + + mock_try_download_head.assert_called_once_with("gsfc-ngap-d-pa-dt1", "OBJECT_1") + assert response.body == "Mock response" + assert response.status_code == 200 + assert response.headers == {} + + +@mock.patch(f"{MODULE}.get_yaml_file", autospec=True) +def test_dynamic_url_head_bad_bucket(mock_get_yaml_file, mock_make_html_response, resources, current_request): + with resources.open("bucket_map_example.yaml") as f: + mock_get_yaml_file.return_value = yaml.full_load(f) + + current_request.uri_params = {"proxy": "DATA-TYPE-1/NONEXISTENT/OBJECT_1"} + + # Can't use the chalice test client here as it doesn't seem to understand the `{proxy+}` route + response = app.dynamic_url_head() + + mock_make_html_response.assert_called_once_with( + { + "contentstring": "Bucket not available", + "title": "Bucket not available", + "requestid": "request_1234" + }, + {}, + 404, + "error.html" + ) + assert response.body == "Mock response" + assert response.status_code == 404 + assert response.headers == {} + + +@mock.patch(f"{MODULE}.get_yaml_file", autospec=True) +def test_dynamic_url_head_missing_proxy(mock_get_yaml_file, current_request): + mock_get_yaml_file.return_value = {} + current_request.uri_params = {} + + # Can't use the chalice test client here as it doesn't seem to understand the `{proxy+}` route + response = app.dynamic_url_head() + + assert response.body == "HEAD failed" + assert response.status_code == 400 + + +@mock.patch(f"{MODULE}.get_user_from_token", autospec=True) +@mock.patch(f"{MODULE}.get_new_token_and_profile", autospec=True) +def test_handle_auth_bearer_header(mock_get_new_token_and_profile, mock_get_user_from_token, current_request): + current_request.headers = {"x-origin-request-id": "origin_request_id"} + mock_user_profile = mock.Mock() + mock_get_new_token_and_profile.return_value = mock_user_profile + mock_get_user_from_token.return_value = "user_name" + + assert app.handle_auth_bearer_header(mock.Mock()) == ("user_profile", mock_user_profile) + mock_get_new_token_and_profile.assert_called_once_with( + "user_name", + True, + aux_headers={ + "x-request-id": "request_1234", + "x-origin-request-id": "origin_request_id" + } + ) + + +@mock.patch(f"{MODULE}.get_user_from_token", autospec=True) +def test_handle_auth_bearer_header_eula_error(mock_get_user_from_token, current_request): + current_request.headers = {"x-origin-request-id": "origin_request_id"} + mock_get_user_from_token.side_effect = app.EulaException({}) + + action, response = app.handle_auth_bearer_header(mock.Mock()) + assert action == "return" + assert response.status_code == 403 + assert response.headers == {} + + +@mock.patch(f"{MODULE}.get_user_from_token", autospec=True) +def test_handle_auth_bearer_header_eula_error_browser( + mock_get_user_from_token, + mock_make_html_response, + current_request +): + current_request.headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"} + mock_get_user_from_token.side_effect = app.EulaException({ + "status_code": 403, + "error_description": "EULA Acceptance Failure", + "resolution_url": "http://resolution_url" + }) + + action, response = app.handle_auth_bearer_header(mock.Mock()) + mock_make_html_response.assert_called_once_with( + { + "title": "EULA Acceptance Failure", + "status_code": 403, + "contentstring": ( + 'Could not fetch data because "EULA Acceptance Failure". Please accept EULA here: ' + 'http://resolution_url and try again.' + ), + "requestid": "request_1234", + }, + {}, + 403, + "error.html" + ) + assert action == "return" + assert response.status_code == 403 + assert response.headers == {} + + +@mock.patch(f"{MODULE}.get_user_from_token", autospec=True) +@mock.patch(f"{MODULE}.get_new_token_and_profile", autospec=True) +@mock.patch(f"{MODULE}.do_auth_and_return", autospec=True) +def test_handle_auth_bearer_header_no_profile( + mock_do_auth_and_return, + mock_get_new_token_and_profile, + mock_get_user_from_token, + current_request +): + current_request.headers = {"x-origin-request-id": "origin_request_id"} + mock_response = mock.Mock() + mock_do_auth_and_return.return_value = mock_response + mock_get_new_token_and_profile.return_value = False + mock_get_user_from_token.return_value = "user_name" + + assert app.handle_auth_bearer_header(mock.Mock()) == ("return", mock_response) + mock_get_new_token_and_profile.assert_called_once_with( + "user_name", + True, + aux_headers={ + "x-request-id": "request_1234", + "x-origin-request-id": "origin_request_id" + } + ) + mock_do_auth_and_return.assert_called_once_with(current_request.context) + + +@mock.patch(f"{MODULE}.get_user_from_token", autospec=True) +@mock.patch(f"{MODULE}.do_auth_and_return", autospec=True) +def test_handle_auth_bearer_header_no_user_id( + mock_do_auth_and_return, + mock_get_user_from_token, + current_request +): + current_request.headers = {"x-origin-request-id": "origin_request_id"} + mock_response = mock.Mock() + mock_do_auth_and_return.return_value = mock_response + mock_get_user_from_token.return_value = None + + assert app.handle_auth_bearer_header(mock.Mock()) == ("return", mock_response) + mock_do_auth_and_return.assert_called_once_with(current_request.context) + + +@mock.patch(f"{MODULE}.get_yaml_file", autospec=True) +@mock.patch(f"{MODULE}.try_download_from_bucket", autospec=True) +@mock.patch(f"{MODULE}.get_cookie_vars", autospec=True) +@mock.patch(f"{MODULE}.JWT_COOKIE_NAME", "asf-cookie") +def test_dynamic_url( + mock_get_cookie_vars, + mock_try_download_from_bucket, + mock_get_yaml_file, + resources, + current_request +): + mock_try_download_from_bucket.return_value = chalice.Response(body="Mock response", headers={}, status_code=200) + with resources.open("bucket_map_example.yaml") as f: + mock_get_yaml_file.return_value = yaml.full_load(f) + + mock_get_cookie_vars.return_value = { + "asf-cookie": { + "urs-user-id": "user_name" + } + } + current_request.uri_params = {"proxy": "DATA-TYPE-1/PLATFORM-A/OBJECT_1"} + + # Can't use the chalice test client here as it doesn't seem to understand the `{proxy+}` route + response = app.dynamic_url() + + mock_try_download_from_bucket.assert_called_once_with( + "gsfc-ngap-d-pa-dt1", + "OBJECT_1", + {"urs-user-id": "user_name"}, + {} + ) + assert response.body == "Mock response" + assert response.status_code == 200 + assert response.headers == {} + + +@mock.patch(f"{MODULE}.get_yaml_file", autospec=True) +@mock.patch(f"{MODULE}.try_download_from_bucket", autospec=True) +@mock.patch(f"{MODULE}.get_cookie_vars", autospec=True) +@mock.patch(f"{MODULE}.JWT_COOKIE_NAME", "asf-cookie") +def test_dynamic_url_public( + mock_get_cookie_vars, + mock_try_download_from_bucket, + mock_get_yaml_file, + resources, + current_request +): + mock_try_download_from_bucket.return_value = chalice.Response(body="Mock response", headers={}, status_code=200) + with resources.open("bucket_map_example.yaml") as f: + mock_get_yaml_file.return_value = yaml.full_load(f) + + mock_get_cookie_vars.return_value = {} + current_request.uri_params = {"proxy": "BROWSE/PLATFORM-A/OBJECT_2"} + + # Can't use the chalice test client here as it doesn't seem to understand the `{proxy+}` route + response = app.dynamic_url() + + mock_try_download_from_bucket.assert_called_once_with("gsfc-ngap-d-pa-bro", "OBJECT_2", None, {}) + assert response.body == "Mock response" + assert response.status_code == 200 + assert response.headers == {} + + +@mock.patch(f"{MODULE}.get_yaml_file", autospec=True) +@mock.patch(f"{MODULE}.try_download_from_bucket", autospec=True) +@mock.patch(f"{MODULE}.user_in_group", autospec=True) +@mock.patch(f"{MODULE}.get_cookie_vars", autospec=True) +@mock.patch(f"{MODULE}.make_set_cookie_headers_jwt", autospec=True) +@mock.patch(f"{MODULE}.JWT_COOKIE_NAME", "asf-cookie") +def test_dynamic_url_private( + mock_make_set_cookie_headers_jwt, + mock_get_cookie_vars, + mock_user_in_group, + mock_try_download_from_bucket, + mock_get_yaml_file, + resources, + current_request +): + mock_try_download_from_bucket.return_value = chalice.Response(body="Mock response", headers={}, status_code=200) + user_profile = { + "urs-user-id": "user_name", + "urs-access-token": "access_token", + "first_name": "First", + "last_name": "Last", + "email_address": "user@example.com", + "user_groups": [] + } + mock_make_set_cookie_headers_jwt.return_value = {"SET-COOKIE": "cookie"} + mock_user_in_group.return_value = (True, user_profile) + with resources.open("bucket_map_example.yaml") as f: + mock_get_yaml_file.return_value = yaml.full_load(f) + + mock_get_cookie_vars.return_value = {"asf-cookie": user_profile} + current_request.uri_params = {"proxy": "PRIVATE/PLATFORM-A/OBJECT_2"} + + # Can't use the chalice test client here as it doesn't seem to understand the `{proxy+}` route + response = app.dynamic_url() + + mock_try_download_from_bucket.assert_called_once_with( + "gsfc-ngap-d-pa-priv", + "OBJECT_2", + user_profile, + {"SET-COOKIE": "cookie"} + ) + assert response.body == "Mock response" + assert response.status_code == 200 + assert response.headers == {} + + +@mock.patch(f"{MODULE}.get_yaml_file", autospec=True) +@mock.patch(f"{MODULE}.get_cookie_vars", autospec=True) +@mock.patch(f"{MODULE}.JWT_COOKIE_NAME", "asf-cookie") +def test_dynamic_url_directory( + mock_get_cookie_vars, + mock_get_yaml_file, + mock_make_html_response, + resources, + current_request +): + with resources.open("bucket_map_example.yaml") as f: + mock_get_yaml_file.return_value = yaml.full_load(f) + + mock_get_cookie_vars.return_value = { + "asf-cookie": { + "urs-user-id": "user_name" + } + } + current_request.uri_params = {"proxy": "DATA-TYPE-1/PLATFORM-A/"} + + # Can't use the chalice test client here as it doesn't seem to understand the `{proxy+}` route + response = app.dynamic_url() + + mock_make_html_response.assert_called_once_with( + { + "contentstring": "Request does not appear to be valid.", + "title": "Request Not Serviceable", + "requestid": "request_1234" + }, + {}, + 404, + "error.html" + ) + assert response.body == "Mock response" + assert response.status_code == 404 + assert response.headers == {} + + +@mock.patch(f"{MODULE}.get_yaml_file", autospec=True) +@mock.patch(f"{MODULE}.try_download_from_bucket", autospec=True) +@mock.patch(f"{MODULE}.get_cookie_vars", autospec=True) +@mock.patch(f"{MODULE}.handle_auth_bearer_header", autospec=True) +@mock.patch(f"{MODULE}.make_set_cookie_headers_jwt", autospec=True) +@mock.patch(f"{MODULE}.JWT_COOKIE_NAME", "asf-cookie") +def test_dynamic_url_bearer_auth( + mock_make_set_cookie_headers_jwt, + mock_handle_auth_bearer_header, + mock_get_cookie_vars, + mock_try_download_from_bucket, + mock_get_yaml_file, + resources, + current_request +): + mock_try_download_from_bucket.return_value = chalice.Response(body="Mock response", headers={}, status_code=200) + mock_handle_auth_bearer_header.return_value = ( + "user_profile", + { + "uid": "user_name", + "first_name": "First", + "last_name": "Last", + "email_address": "user@example.com", + "user_groups": [] + } + ) + mock_make_set_cookie_headers_jwt.return_value = {"SET-COOKIE": "cookie"} + with resources.open("bucket_map_example.yaml") as f: + mock_get_yaml_file.return_value = yaml.full_load(f) + + mock_get_cookie_vars.return_value = {} + current_request.uri_params = {"proxy": "DATA-TYPE-1/PLATFORM-A/OBJECT_1"} + current_request.headers = {"Authorization": "bearer b64token"} + + # Can't use the chalice test client here as it doesn't seem to understand the `{proxy+}` route + response = app.dynamic_url() + + mock_try_download_from_bucket.assert_called_once_with( + "gsfc-ngap-d-pa-dt1", + "OBJECT_1", + { + "uid": "user_name", + "first_name": "First", + "last_name": "Last", + "email_address": "user@example.com", + "user_groups": [] + }, + {"SET-COOKIE": "cookie"} + ) + assert response.body == "Mock response" + assert response.status_code == 200 + assert response.headers == {} + + +def test_profile(client): + response = client.http.get("/profile") + + assert response.body == b"Profile not available." + assert response.status_code == 200 + + +@mock.patch(f"{MODULE}.get_jwt_keys", autospec=True) +@mock.patch(f"{MODULE}.JWT_ALGO", "THE_ALGO") +def test_pubkey(get_keys_mock, client): + get_keys_mock.return_value = {"rsa_pub_key": b"THE KEY"} + + response = client.http.get("/pubkey") + + assert response.json_body == {"rsa_pub_key": "THE KEY", "algorithm": "THE_ALGO"} + assert response.status_code == 200 + + +def test_x_origin_request_id_forwarded(client): + # Could be any endpoint, but profile is the simplest + response = client.http.get("/profile", headers={"x-origin-request-id": "x_origin_request_1234"}) + + assert response.headers["x-origin-request-id"] == "x_origin_request_1234" diff --git a/tests/test_tea_bumper.py b/tests/test_tea_bumper.py new file mode 100644 index 00000000..513f5195 --- /dev/null +++ b/tests/test_tea_bumper.py @@ -0,0 +1,38 @@ +import importlib +from unittest import mock + +import pytest + +MODULE = "lambda.tea_bumper" +tea_bumper = importlib.import_module(MODULE) + + +@pytest.fixture +def context(): + return mock.Mock(aws_request_id="request_1234") + + +@mock.patch(f"{MODULE}.datetime") +def test_lambda_handler(mock_datetime, boto3, context): + mock_datetime.utcnow.return_value = 0 + + client = boto3.client("lambda") + client.get_function_configuration.return_value = { + "Environment": { + "Variables": { + "foo": "bar" + } + } + } + + tea_bumper.lambda_handler(None, context) + + client.update_function_configuration.assert_called_once_with( + FunctionName=None, + Environment={ + "Variables": { + "foo": "bar", + "BUMP": "0, request_1234" + } + } + ) diff --git a/tests/test_update_lambda.py b/tests/test_update_lambda.py new file mode 100644 index 00000000..2fc0820b --- /dev/null +++ b/tests/test_update_lambda.py @@ -0,0 +1,144 @@ +import importlib +import io +import json +from unittest import mock + +import pytest + +MODULE = "lambda.update_lambda" +update_lambda = importlib.import_module(MODULE) + + +@pytest.fixture +def context(): + return mock.Mock(aws_request_id="request_1234") + + +@mock.patch(f"{MODULE}.get_region_cidrs") +@mock.patch(f"{MODULE}.cfnresponse") +def test_lambda_handler(mock_cfnresponse, mock_get_region_cidrs, boto3, context, monkeypatch): + monkeypatch.setenv("iam_role_name", "role_1234") + client = boto3.client("iam") + client.list_role_policies.return_value = { + "PolicyNames": ["foo", "bar"] + } + mock_get_region_cidrs.return_value = ["10.0.0.0/8"] + event = {"ResponseURL": "https://example.com"} + + update_lambda.lambda_handler(event, context) + + client.put_role_policy.assert_called_once() + client.delete_role_policy.assert_any_call(RoleName="role_1234", PolicyName="foo") + client.delete_role_policy.assert_any_call(RoleName="role_1234", PolicyName="bar") + mock_cfnresponse.send.assert_called_once_with(event, context, mock_cfnresponse.SUCCESS, {"Data": mock.ANY}) + + +@mock.patch(f"{MODULE}.get_region_cidrs") +@mock.patch(f"{MODULE}.cfnresponse") +def test_lambda_handler_no_response(mock_cfnresponse, mock_get_region_cidrs, boto3, context, monkeypatch): + monkeypatch.setenv("iam_role_name", "role_1234") + client = boto3.client("iam") + client.list_role_policies.return_value = { + "PolicyNames": ["foo", "bar"] + } + mock_get_region_cidrs.return_value = ["10.0.0.0/8"] + event = {} + + update_lambda.lambda_handler(event, context) + + client.put_role_policy.assert_called_once() + client.delete_role_policy.assert_any_call(RoleName="role_1234", PolicyName="foo") + client.delete_role_policy.assert_any_call(RoleName="role_1234", PolicyName="bar") + mock_cfnresponse.send.assert_not_called() + + +@mock.patch(f"{MODULE}.get_region_cidrs") +@mock.patch(f"{MODULE}.cfnresponse") +def test_lambda_handler_no_policy_names(mock_cfnresponse, mock_get_region_cidrs, boto3, context): + client = boto3.client("iam") + client.list_role_policies.return_value = {} + mock_get_region_cidrs.return_value = ["10.0.0.0/8"] + event = {} + + update_lambda.lambda_handler(event, context) + + client.put_role_policy.assert_called_once() + client.delete_role_policy.assert_not_called() + mock_cfnresponse.send.assert_not_called() + + +@mock.patch(f"{MODULE}.get_region_cidrs") +@mock.patch(f"{MODULE}.cfnresponse") +def test_lambda_handler_error(mock_cfnresponse, mock_get_region_cidrs, context): + mock_get_region_cidrs.side_effect = Exception("mock exception") + event = {"ResponseURL": "https://example.com"} + + update_lambda.lambda_handler(event, context) + + mock_cfnresponse.send.assert_called_once_with(event, context, mock_cfnresponse.FAILED, {"Data": mock.ANY}) + + +@mock.patch(f"{MODULE}.get_region_cidrs") +@mock.patch(f"{MODULE}.cfnresponse") +def test_lambda_handler_error_no_response(mock_cfnresponse, mock_get_region_cidrs, context): + mock_get_region_cidrs.side_effect = Exception("mock exception") + event = {} + + update_lambda.lambda_handler(event, context) + + mock_cfnresponse.send.assert_not_called() + + +@mock.patch(f"{MODULE}.urllib.request") +def test_get_region_cidrs(mock_request): + data = json.dumps({ + "prefixes": [ + # Correct service and region + { + "ip_prefix": "10.10.0.0/24", + "service": "AMAZON", + "region": "us-east-1" + }, + # Wrong service + { + "ip_prefix": "10.20.0.0/24", + "service": "SOMETHING_ELSE", + "region": "us-east-1" + }, + # Wrong region + { + "ip_prefix": "10.30.0.0/24", + "service": "AMAZON", + "region": "us-west-2" + }, + # Two prefixes that should be merged + { + "ip_prefix": "10.40.0.0/24", + "service": "AMAZON", + "region": "us-east-1" + }, + { + "ip_prefix": "10.40.0.0/16", + "service": "AMAZON", + "region": "us-east-1" + }, + ] + }).encode() + mock_request.urlopen.return_value = io.BytesIO(data) + ips = update_lambda.get_region_cidrs("us-east-1") + + assert ips == [ + "10.10.0.0/24", + "10.40.0.0/16", + "10.0.0.0/8" + ] + + +def test_get_base_policy(monkeypatch): + monkeypatch.setenv("vpcid", "vpc_1234") + + policy = update_lambda.get_base_policy("prefix_1234") + assert isinstance(policy, dict) + for statement in policy["Statement"]: + for resource in statement["Resource"]: + assert resource.startswith("arn:aws:s3:::prefix_1234") diff --git a/tests_e2e/__init__.py b/tests_e2e/__init__.py new file mode 100644 index 00000000..9ef763e4 --- /dev/null +++ b/tests_e2e/__init__.py @@ -0,0 +1 @@ +# End to end integration tests diff --git a/tests_e2e/conftest.py b/tests_e2e/conftest.py new file mode 100644 index 00000000..520e9589 --- /dev/null +++ b/tests_e2e/conftest.py @@ -0,0 +1,229 @@ +import json +import logging +import os +import urllib.parse +from datetime import datetime + +import boto3 +import pytest +import requests + +logging.getLogger().setLevel(logging.DEBUG) +logging.getLogger("botocore").setLevel(logging.INFO) + + +def pytest_addoption(parser): + parser.addoption( + "--stack-name", + help="Name of the cloudformation stack", + required=True, + action="store", + ) + parser.addoption( + "--url", + help=( + "The base URL of the API to test. " + "If it is omitted then boto3 will be used to get the execute api URL" + ), + action="store", + ) + parser.addoption( + "--profile", + help="AWS profile name to use", + action="store", + ) + + def S3Object(text): + if "/" not in text: + raise ValueError("format should be 'bucket/object'") + + return text.split("/", 1) + + parser.addoption( + "--test-results", + help="S3 bucket/object to upload test results to.", + action="store", + type=S3Object, + ) + + +def pytest_report_header(config): + profile = config.getoption("--profile") or "default" + stack_name = config.getoption("--stack-name") + + return f"AWS: Profile={profile}, CloudFormation Stack={stack_name}" + + +@pytest.fixture(scope="session", autouse=True) +def _configure_from_options(aws_profile): + kwargs = {} + + if aws_profile: + kwargs["profile_name"] = aws_profile + + boto3.setup_default_session(**kwargs) + + +@pytest.fixture(scope="session") +def urs_username(): + return os.environ["URS_USERNAME"] + + +@pytest.fixture(scope="session") +def urs_password(): + return Secret(os.environ["URS_PASSWORD"]) + + +class Secret(): + def __init__(self, val): + self.val = val + + def __repr__(self): + return "'******'" + + def __str__(self): + return self.val + + +@pytest.fixture(scope="session") +def stack_name(request): + return request.config.getoption("--stack-name") + + +@pytest.fixture(scope="session") +def _api_url(request): + # Don't request this fixture directly. Use `api_url` instead. + return request.config.getoption("--url") + + +@pytest.fixture(scope="session") +def aws_profile(request): + return request.config.getoption("--profile") + + +@pytest.fixture(scope="session") +def api_url(_api_url, request): + if _api_url: + return _api_url + + api_host = request.getfixturevalue("_boto3_api_host") + return f"https://{api_host}/API/" + + +@pytest.fixture(scope="session") +def _boto3_api_host(stack_name): + # Don't request this fixture directly. Use `api_host` instead. + client = boto3.client("apigateway") + rest_apis = client.get_rest_apis() + + for api in rest_apis['items']: + if api['name'] == f"{stack_name}-EgressGateway": + return f"{api['id']}.execute-api.{client.meta.region_name}.amazonaws.com" + + raise Exception(f"Could not find API for the given stackname {stack_name}") + + +@pytest.fixture(scope="session") +def api_host(_api_url, request): + if _api_url: + parse_result = urllib.parse.urlparse(_api_url) + return parse_result.hostname + + return request.getfixturevalue("_boto3_api_host") + + +@pytest.fixture(scope="session") +def urls(api_url): + return UrlsConfig(api_url) + + +class UrlsConfig(): + _METADATA_FILE_NAME = "S1A_EW_GRDM_1SDH_20190206T190846_20190206T190951_025813_02DF0B_781A.iso.xml" + METADATA_FILE = f"SA/METADATA_GRD_HS/{_METADATA_FILE_NAME}" + METADATA_FILE_CH = f"SA/METADATA_GRD_HS_CH/{_METADATA_FILE_NAME}" + + def __init__(self, base_url: str): + self.base_url = base_url + + def join(self, *parts): + return urllib.parse.urljoin(self.base_url, "/".join(parts)) + + +@pytest.fixture(scope="module") +def auth_cookies(urls, api_host, urs_username, urs_password): + url = urls.join(urls.METADATA_FILE) + session = requests.session() + request = session.get(url) + url_earthdata = request.url + + session.get(url_earthdata, auth=requests.auth.HTTPBasicAuth(urs_username, str(urs_password))) + cookiejar = session.cookies + + # Copy .asf.alaska.edu cookies to match API Address + for z in cookiejar: + if "asf.alaska.edu" in z.domain: + cookiejar.set_cookie(requests.cookies.create_cookie( + domain=api_host, + name=z.name, + value=z.value + )) + + return cookiejar + + +# Functions that generate the JSON report file +def pytest_sessionstart(session): + session.results = {} + + +@pytest.hookimpl(tryfirst=True, hookwrapper=True) +def pytest_runtest_makereport(item, call): + del call + + outcome = yield + rep = outcome.get_result() + + if rep.when == "call": + item.session.results[item] = rep + + +def pytest_sessionfinish(session, exitstatus): + del exitstatus + + test_results_config = session.config.getoption("--test-results") + if not test_results_config: + return + + s3_bucket, s3_object = test_results_config + + failed_amount = sum(result.failed for result in session.results.values()) + total_amount = len(session.results) + + if failed_amount < 1: + message = "All Tests Passed" + color = "success" + elif failed_amount < 3: + message = f"{failed_amount} of {total_amount} Tests Failed ⚠z" + color = "important" + else: + message = f"{failed_amount} of {total_amount} Tests Failed ☠" + color = "critical" + + # Write out the string + testresults = json.dumps({ + "schemaVersion": 1, + "label": "Tests", + "message": message, + "color": color + }) + + # Required to make the file public and usable as input for the badge. + put_args = { + "CacheControl": "no-cache", + "Expires": datetime(2015, 1, 1), + "ContentType": "application/json", + "ACL": "public-read" + } + + # Dump results to S3. + boto3.resource('s3').Object(s3_bucket, s3_object).put(Body=testresults, **put_args) diff --git a/tests_e2e/test_auth.py b/tests_e2e/test_auth.py new file mode 100644 index 00000000..ded73c53 --- /dev/null +++ b/tests_e2e/test_auth.py @@ -0,0 +1,31 @@ +import requests +from requests.auth import HTTPBasicAuth + + +def test_auth_process(urls, api_host, urs_username, urs_password): + url = urls.join(urls.METADATA_FILE) + session = requests.session() + + # Follow redirects to get the urthdata URL. We will get access denied because + # we aren't passing our creds in yet. + resp1 = session.get(url) + assert resp1.status_code == 401 + + url_earthdata = resp1.url + resp2 = session.get(url_earthdata, auth=HTTPBasicAuth(urs_username, str(urs_password))) + + assert resp2.status_code == 200 + cookiejar = session.cookies + + # Copy .asf.alaska.edu cookies to match API Address + for z in cookiejar: + if "asf.alaska.edu" in z.domain: + cookiejar.set_cookie(requests.cookies.create_cookie( + domain=api_host, + name=z.name, + value=z.value + )) + + final_request = session.get(url, cookies=cookiejar) + + assert final_request.status_code == 200 diff --git a/tests_e2e/test_cors.py b/tests_e2e/test_cors.py new file mode 100644 index 00000000..67630e67 --- /dev/null +++ b/tests_e2e/test_cors.py @@ -0,0 +1,20 @@ +import requests + + +def test_cors(urls, auth_cookies): + origin_host = "https://something.asf.alaska.edu" + + url = urls.join(urls.METADATA_FILE_CH) + origin_headers = {"origin": origin_host} + + r = requests.get(url, cookies=auth_cookies, headers=origin_headers, allow_redirects=False) + headers = dict(r.headers) + + assert headers.get("Access-Control-Allow-Origin") == origin_host + assert headers.get("Access-Control-Allow-Credentials") == "true" + + headers = {"origin": "null"} + r = requests.get(url, cookies=auth_cookies, headers=headers, allow_redirects=False) + headers = dict(r.headers) + + assert headers.get("Access-Control-Allow-Origin") == "null" diff --git a/tests_e2e/test_jwt_blacklist.py b/tests_e2e/test_jwt_blacklist.py new file mode 100644 index 00000000..f035c715 --- /dev/null +++ b/tests_e2e/test_jwt_blacklist.py @@ -0,0 +1,81 @@ +import contextlib +import copy +import time + +import boto3 +import pytest +import requests + + +@pytest.fixture(scope="session") +def aws_function_name(stack_name): + return f"{stack_name}-EgressLambda" + + +@pytest.fixture +def aws_lambda_client(): + return boto3.client("lambda") + + +@pytest.fixture +def endpoint_patcher(aws_lambda_client, aws_function_name): + return EndpointPatcher(aws_lambda_client, aws_function_name) + + +# TODO(reweeden): Instead of patching the blacklist which requires that we have proper AWS credentials, +# lets just use two different accounts instead +class EndpointPatcher(): + def __init__(self, aws_lambda_client, aws_function_name): + self.aws_lambda_client = aws_lambda_client + self.aws_function_name = aws_function_name + + @contextlib.contextmanager + def __call__(self, endpoint): + endpoint_dict = {"BLACKLIST_ENDPOINT": endpoint} + lambda_configuration = self.aws_lambda_client.get_function_configuration( + FunctionName=self.aws_function_name + ) + + new_env_vars = lambda_configuration["Environment"] + old_env_vars = copy.deepcopy(new_env_vars) + new_env_vars["Variables"].update(endpoint_dict) + + self.aws_lambda_client.update_function_configuration( + FunctionName=self.aws_function_name, + Environment=new_env_vars + ) + + time.sleep(3) + + try: + yield + finally: + self.aws_lambda_client.update_function_configuration( + FunctionName=self.aws_function_name, + Environment=old_env_vars + ) + time.sleep(3) + + +def test_validate_invalid_jwt(urls, auth_cookies, endpoint_patcher): + url = urls.join(urls.METADATA_FILE) + # TODO(reweeden): config? + endpoint = "https://s3-us-west-2.amazonaws.com/asf.rain.code.usw2/jwt_blacklist.json" + + with endpoint_patcher(endpoint): + r = requests.get(url, cookies=auth_cookies, allow_redirects=False) + + assert r.is_redirect is True + assert r.headers['Location'] is not None + assert 'oauth/authorize' in r.headers['Location'] + + +def test_validate_valid_jwt(urls, auth_cookies, endpoint_patcher): + url = urls.join(urls.METADATA_FILE) + # TODO(reweeden): Config? + endpoint = "https://s3-us-west-2.amazonaws.com/asf.rain.code.usw2/valid_jwt_blacklist_test.json" + + with endpoint_patcher(endpoint): + r = requests.get(url, cookies=auth_cookies) + + assert r.status_code == 200 diff --git a/tests_e2e/test_protected.py b/tests_e2e/test_protected.py new file mode 100644 index 00000000..ce8bfa19 --- /dev/null +++ b/tests_e2e/test_protected.py @@ -0,0 +1,114 @@ +import base64 +import json +from uuid import uuid1 + +import requests + +LOCATE_BUCKET = "s1-ocn-1e29d408" + + +def test_urs_auth_redirect_for_auth_downloads(urls, auth_cookies): + url = urls.join(urls.METADATA_FILE) + + r = requests.get(url, cookies=auth_cookies, allow_redirects=False) + + assert r.status_code == 303 + assert r.is_redirect is True + assert r.headers['Location'] is not None + assert 'oauth/authorize' not in r.headers['Location'] + + +def test_origin_request_header(urls, auth_cookies): + url = urls.join(urls.METADATA_FILE) + origin_request_value = str(uuid1()) + headers = {"x-origin-request-id": origin_request_value} + + r = requests.get(url, cookies=auth_cookies, headers=headers, allow_redirects=False) + + headers = dict(r.headers) + assert headers.get('x-origin-request-id') == origin_request_value + + +def test_range_request_works(urls, auth_cookies): + url = urls.join(urls.METADATA_FILE) + headers = {"Range": "bytes=1035-1042"} + + r = requests.get(url, cookies=auth_cookies, headers=headers) + + assert r.status_code == 206 + assert len(r.text) == 8 + + +def test_approved_user_can_access_private_data(urls, auth_cookies): + url = urls.join("PRIVATE", "ACCESS", "testfile") + + r = requests.get(url, cookies=auth_cookies) + + assert r.status_code == 200 + + +def test_approved_user_cant_access_private_data(urls, auth_cookies): + url = urls.join("PRIVATE", "NOACCESS", "testfile") + + r = requests.get(url, cookies=auth_cookies) + + assert r.status_code == 403 + + +def test_validating_objects_with_prefix(urls, auth_cookies): + url = urls.join("SA", "BROWSE", "dir1", "dir2", "deepfile.txt") + + r = requests.get(url, cookies=auth_cookies) + + assert "file was successfully downloaded" in str(r.content) + assert r.status_code == 200 + + +def test_validate_custom_headers(urls, auth_cookies): + url = urls.join(urls.METADATA_FILE_CH) + + r = requests.get(url, cookies=auth_cookies, allow_redirects=False) + + headers = dict(r.headers) + assert headers.get('x-rainheader1') is not None + + +def test_validate_locate_handles_complex_configuration_key(api_url, auth_cookies): + url = f"{api_url}/locate?bucket_name={LOCATE_BUCKET}" + + r = requests.get(url, cookies=auth_cookies) + + paths = sorted(json.loads(r.content)) + assert paths == ["SA/OCN", "SA/OCN_CH", "SB/OCN", "SB/OCN_CH"] + + +def find_bearer_token(auth_cookies): + for cookie in auth_cookies: + if cookie.name == 'asf-urs': + # Grab the JWT payload: + cookie_b64 = cookie.value.split(".")[1] + # Fix the padding: + cookie_b64 += '=' * (4 - (len(cookie_b64) % 4)) + # Decode & Load... + cookie_json = json.loads(base64.b64decode(cookie_b64)) + if 'urs-access-token' in cookie_json: + return cookie_json['urs-access-token'] + return None + + +def validate_bearer_token_works(auth_cookies, url): + token = find_bearer_token(auth_cookies) + assert token is not None + + r = requests.get(url, headers={"Authorization": f"Bearer {token}"}) + assert r.status_code == 200 + + +def test_validate_bearer_token_works(urls, auth_cookies): + url = urls.join(urls.METADATA_FILE) + validate_bearer_token_works(auth_cookies, url) + + +def test_validate_private_file_bearer_token_works(urls, auth_cookies): + url = urls.join("PRIVATE", "ACCESS", "testfile") + validate_bearer_token_works(auth_cookies, url) diff --git a/tests_e2e/test_public.py b/tests_e2e/test_public.py new file mode 100644 index 00000000..d29c126e --- /dev/null +++ b/tests_e2e/test_public.py @@ -0,0 +1,41 @@ +# Check that public files are returned without auth +import requests + +BROWSE_FILE = "SA/BROWSE/S1A_EW_GRDM_1SDH_20190206T190846_20190206T190951_025813_02DF0B_781A.jpg" +OBJ_PREFIX_FILE = "SA/METADATA_GRD_HS_CH/browse/ALAV2A104483200-OORIRFU_000.png" + + +def test_public_images(urls): + url = urls.join(BROWSE_FILE) + r = requests.get(url) + + assert r.status_code == 200 + assert r.headers["Content-Type"] == "image/jpeg" + + +def test_check_public_obj_prefix(urls): + url = urls.join(OBJ_PREFIX_FILE) + r = requests.get(url) + + assert r.status_code == 200 + + +def test_404_on_bad_request(urls): + url = urls.join("bad", "url.ext") + r = requests.get(url) + + assert r.status_code == 404 + + +def test_bad_cookie_value_cause_URS_redirect(urls): + url = urls.join(urls.METADATA_FILE) + cookies = { + "urs_user_id": "badusername", + "urs_access_token": "blah" + } + + r = requests.get(url, cookies=cookies, allow_redirects=False) + + assert r.is_redirect is True + assert r.headers["Location"] is not None + assert "oauth/authorize" in r.headers["Location"]