Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion arclight/app/controllers/static_finding_aid_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,32 @@ class StaticFindingAidController < ApplicationController
end

def show
@document = search_service.fetch(::RSolr.solr_escape(params[:id]))

# get document's last indexed date and id
doc_id = @document.id
last_indexed = @document["timestamp"]
s3_key = "static_findaids/#{doc_id}"

s3 = Aws::S3::Client.new(region: "us-west-2")
bucket = ENV.fetch("S3_BUCKET")

begin
head = s3.head_object(bucket: bucket, key: s3_key)
s3_last_indexed = head.metadata["lastindexed"]

if s3_last_indexed == last_indexed
html_obj = s3.get_object(bucket: bucket, key: s3_key)
send_data html_obj.body.read, type: "text/html", disposition: "inline"
return
end
rescue Aws::S3::Errors::NotFound
# object does not exist, continue as normal
end

if !helpers.show_static_finding_aid_link?(@document)
redirect_to solr_document_path(@document), status: 302
end
@doc_tree = Oac::FindingAidTreeNode.new(self, params[:id])
@document = @doc_tree.document
end
end
104 changes: 104 additions & 0 deletions arclight/bin/build-static-findaids
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#!/bin/bash
# filepath: /Users/awieliczka/Projects/cinco/arclight/bin/build-static-findaids-sketch.sh

set -euo pipefail

# Default values
PAGE_SIZE="${1:-5}" # Default to 100 if not provided
COMPONENT_COUNT_RANGE="${2:-10001 TO 15000}"

# Usage info
if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
echo "Usage: $0 [PAGE_SIZE] [COMPONENT_COUNT_RANGE]"
echo " PAGE_SIZE: Number of results per Solr page (default: 5)"
echo " COMPONENT_COUNT_RANGE: Solr range query for total_component_count_is (default: '4501 TO *')"
exit 0
fi

# Requires jq
echo -e "\nInstalling jq...\n"
apt-get update && apt-get install jq -y

# 1. Query for a list of big finding aids using curl
# Requires $SOLR_URL and $S3_BUCKET to be set in the environment
SOLR_URL="${SOLR_URL:?SOLR_URL must be set}"
S3_BUCKET="${S3_BUCKET:?S3_BUCKET must be set}"

# overwrite the CHILD_COMPONENT_LIMIT, set to 1700 by default
echo -e "\nSetting CHILD_COMPONENT_LIMIT to 1700..."
export CHILD_COMPONENT_LIMIT=15000
export ENABLE_STATIC_GUIDE_CACHE=false

echo -e "\nQuerying Solr for large finding aids..."
results=$(curl -s --get "$SOLR_URL/select" \
--data-urlencode "fq=total_component_count_is:[${COMPONENT_COUNT_RANGE}]" \
--data-urlencode 'indent=true' \
--data-urlencode 'q.op=OR' \
--data-urlencode 'q=level_ssim:"Collection"' \
--data-urlencode 'sort=total_component_count_is desc' \
--data-urlencode "rows=$PAGE_SIZE" \
--data-urlencode 'fl=total_component_count_is,id,timestamp')

# 2. Start the rails server (if not already running)
if ! pgrep -f "rails server" > /dev/null; then
echo -e "\nStarting Rails server..."
/rails/bin/docker-entrypoint ./bin/rails server > /tmp/rails-server.log 2>&1 &
sleep 10 # Give the server time to start
fi

# 3. Get the currently running application version
if [[ -f CINCO_VERSION.txt ]]; then
VERSION=$(cat CINCO_VERSION.txt)
else
VERSION="unknown"
fi

# 4. For each ark/last-indexed-date in our solr search results set
NUM_FOUND=$(echo "$results" | jq '.response.numFound')
echo -e "\nProcessing Solr results... ($NUM_FOUND total results, $PAGE_SIZE per page)"

START=0

while (( START < NUM_FOUND )); do
echo -e "\n===== Fetching Solr results page starting at $START... =====\n"
page_results=$(curl -s --get "$SOLR_URL/select" \
--data-urlencode "fq=total_component_count_is:[${COMPONENT_COUNT_RANGE}]" \
--data-urlencode 'indent=true' \
--data-urlencode 'q.op=OR' \
--data-urlencode 'q=level_ssim:"Collection"' \
--data-urlencode 'sort=total_component_count_is desc' \
--data-urlencode "rows=$PAGE_SIZE" \
--data-urlencode "start=$START" \
--data-urlencode 'fl=total_component_count_is,id,timestamp')

echo "$page_results" | jq -c '.response.docs[]' | while read -r doc; do
ARK=$(echo "$doc" | jq -r '.id')
LAST_INDEXED_DATE=$(echo "$doc" | jq -r '.timestamp // empty')
if [[ -z "$ARK" ]]; then continue; fi

echo "Fetching static HTML for $ARK..."
ENCODED_ARK=$(printf '%s' "$ARK" | jq -sRr @uri)
curl -s "http://0.0.0.0:3000/findaid/static/$ENCODED_ARK" -o /tmp/static_findaid.html

if [[ ! -s /tmp/static_findaid.html ]]; then
echo -e "\033[31mFailed to fetch static HTML for $ARK, skipping upload.\033[0m\n"
rm -f /tmp/static_findaid.html
continue
fi

echo -e "Uploading to S3: $S3_BUCKET/static_findaids/$ARK"
aws s3 cp /tmp/static_findaid.html "s3://$S3_BUCKET/static_findaids/$ARK" \
--metadata "ArclightVersion=$VERSION,LastIndexed=$LAST_INDEXED_DATE"

rm -f /tmp/static_findaid.html

# Throttle requests to avoid overloading Solr
sleep 1
done

START=$(( START + PAGE_SIZE ))
done

echo -e "\n\n===== Rails server log output ====="
cat /tmp/rails-server.log
echo "Done."
45 changes: 45 additions & 0 deletions arclight/bin/build-static-findaids-sketch.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@


# 1. Query for a list of big finding aids using curl

We have a $SOLR_URL in the container, already defined.

Here's the query:
```
$SOLR_URL/select?
fq=total_component_count_is%3A[4501%20TO%20*]&
indent=true&
q.op=OR&
q=level_ssim%3A%22Collection%22&
sort=total_component_count_is%20desc&
rows=300&
fl=total_component_count_is,id
```

- add last indexed date to returned fields for use in s3 metadata

```curl $SOLR_URL...```

# 2. Start the rails server

It's not currently running, since we issued a command override to start
this container, so:

```/rails/bin/docker-entrypoint ./bin/rails server &```

# 3. Get the currently running application version

set VERSION = cat CINCO_VERSION.txt

# 3. For each ark/last-indexed-date in our solr search results set

- get the last indexed date
- get the ark

```
curl http://0.0.0.0:3000/findaid/static/$ARK -o /tmp/static.html
aws s3 cp /tmp/static.html s3://$S3_BUCKET/static_findaids/$ARK --metadata ArclightVersion=VERSION,LastIndexed=$LAST_INDEXED_DATE
```

- throttle requests so we don't overload solr
- stash in $S3_BUCKET/static_findaids/ (/static/ is Django's static files! don't overwrite!)
9 changes: 8 additions & 1 deletion arclight/config/initializers/static_finding_aid.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,16 @@
# too many to display a static finding aid.
#
#
Rails.application.config.child_component_limit = ENV["CHILD_COMPONENT_LIMIT"] || 4500
Rails.application.config.child_component_limit = ENV["CHILD_COMPONENT_LIMIT"] || 1700

#
enable_static_guide_cache = ENV["ENABLE_STATIC_GUIDE_CACHE"] || true
if enable_static_guide_cache == "false"
Rails.application.config.enable_static_guide_cache = false
else
Rails.application.config.enable_static_guide_cache = true
end

Rails.application.config.disallowed_static_guides = [
"ark:/13030/c8tt4pp0"
]
6 changes: 5 additions & 1 deletion arclight/lib/oac/finding_aid_tree_node.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@ def initialize(controller, id, has_children: true)
end

def children
Rails.cache.fetch("#{@document.id}/children") do
if Rails.config.enable_static_guide_cache
Rails.cache.fetch("#{@document.id}/children") do
_get_children
end
else
_get_children
end
end
Expand Down
1 change: 1 addition & 0 deletions infrastructure/cinco/config/prd/arclight/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ sceptre_user_data:
- TAG=`git describe --tags --abbrev=0`
- REPO="$AWS_ACCOUNT_ID.dkr.ecr.us-west-2.amazonaws.com"
- NEW_IMAGE="$REPO/cinco-arclight:$TAG"
- cp ./VERSION.txt arclight/CINCO_VERSION.txt
- aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin $REPO
- docker build -t cinco-arclight:$TAG arclight --file arclight/Dockerfile
- docker tag cinco-arclight:$TAG $NEW_IMAGE
Expand Down
1 change: 1 addition & 0 deletions infrastructure/cinco/config/prd/cincoctrl/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ sceptre_user_data:
- TAG=`git describe --tags --abbrev=0`
- REPO="$AWS_ACCOUNT_ID.dkr.ecr.us-west-2.amazonaws.com"
- NEW_IMAGE="$REPO/cinco-ctrl:$TAG"
- cp ./VERSION.txt cincoctrl/CINCO_VERSION.txt
- aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin $REPO
- docker build -t cinco-ctrl:$TAG cincoctrl --file cincoctrl/compose/production/django/Dockerfile
- docker tag cinco-ctrl:$TAG $NEW_IMAGE
Expand Down
1 change: 1 addition & 0 deletions infrastructure/cinco/config/stage/arclight/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ sceptre_user_data:
phases:
build:
commands:
- git rev-parse --short HEAD > arclight/CINCO_VERSION.txt
- aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin ${AWS::AccountId}.dkr.ecr.us-west-2.amazonaws.com
- docker build -t cinco-arclight arclight --file arclight/Dockerfile
- docker tag cinco-arclight:latest ${AWS::AccountId}.dkr.ecr.us-west-2.amazonaws.com/cinco-arclight:latest
Expand Down
1 change: 1 addition & 0 deletions infrastructure/cinco/config/stage/cincoctrl/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ sceptre_user_data:
python: 3.12
build:
commands:
- git rev-parse --short HEAD > cincoctrl/CINCO_VERSION.txt
- aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin ${AWS::AccountId}.dkr.ecr.us-west-2.amazonaws.com
- docker build -t cinco-ctrl cincoctrl --file cincoctrl/compose/production/django/Dockerfile
- docker tag cinco-ctrl:latest ${AWS::AccountId}.dkr.ecr.us-west-2.amazonaws.com/cinco-ctrl:latest
Expand Down
Loading