Skip to content

Fetch JTWC Products #84

Fetch JTWC Products

Fetch JTWC Products #84

Workflow file for this run

name: Fetch JTWC Products
# 触发条件:每10分钟自动运行,且允许手动触发
on:
schedule:
- cron: '*/10 * * * *' # 每10分钟运行一次
workflow_dispatch: # 允许手动触发
jobs:
fetch-and-commit:
runs-on: ubuntu-latest
steps:
# 检出代码仓库
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0 # 确保获取所有分支历史
# 切换到pgtw分支,如果不存在则创建
- name: Switch to pgtw branch or create it
run: |
# 检查远程是否存在pgtw分支
if git ls-remote --heads origin pgtw | grep -q pgtw; then
echo "Remote branch pgtw exists"
# 检查本地是否已有pgtw分支
if git rev-parse --verify pgtw; then
echo "Local branch pgtw exists, checking it out"
git checkout pgtw
else
echo "Local branch pgtw does not exist, creating it from remote"
git checkout -b pgtw origin/pgtw
fi
# 拉取远程最新更改
git pull origin pgtw
else
echo "Branch pgtw does not exist remotely, creating new local branch"
git checkout --orphan pgtw # 创建一个无历史的新分支
git rm -rf . # 清除工作区内容,从空白开始
fi
# 从JTWC主页抓取所有产品txt链接并下载
- name: Fetch all JTWC product txt files
run: |
BASE_URL="https://www.metoc.navy.mil/jtwc"
PRODUCTS_BASE="https://www.metoc.navy.mil/jtwc/products"
echo "=== Fetching JTWC main page ==="
# 下载JTWC主页HTML并保存原始文件用于排查
curl -s -o jtwc_page.html https://www.metoc.navy.mil/jtwc/jtwc.html
echo "=== Saved raw HTML to jtwc_page.html ==="
echo "Page size: $(wc -c < jtwc_page.html) bytes"
# 提取链接:同时匹配绝对路径和相对路径
# 1) 绝对路径: https://www.metoc.navy.mil/jtwc/products/xxx.txt
# 2) 相对路径: products/xxx.txt 或 ./products/xxx.txt
LINKS=$(grep -oP '(?:https://www\.metoc\.navy\.mil/jtwc/products/|(?<=["\x27])(?:\.?/?products/))[^"'\''<>\s]*\.txt' jtwc_page.html | sort -u)
# 如果没找到,再尝试更宽松的匹配
if [ -z "$LINKS" ]; then
echo "=== First pattern found nothing, trying broader match ==="
LINKS=$(grep -oiP '[^"'\''<>\s]*products/[^"'\''<>\s]*\.txt' jtwc_page.html | sort -u)
fi
if [ -z "$LINKS" ]; then
echo "No .txt links found on the page."
echo "=== Page content preview (first 200 lines) ==="
head -200 jtwc_page.html
exit 0
fi
echo "=== Found the following .txt links ==="
echo "$LINKS"
echo ""
# 逐个下载到根目录
DOWNLOAD_COUNT=0
for link in $LINKS; do
# 将相对路径转为绝对URL
if echo "$link" | grep -q "^https://"; then
url="$link"
elif echo "$link" | grep -q "^products/"; then
url="${BASE_URL}/${link}"
elif echo "$link" | grep -q "^\./products/"; then
url="${BASE_URL}/${link#./}"
else
url="${PRODUCTS_BASE}/${link}"
fi
# 从URL中提取文件名
FILENAME=$(basename "$url")
echo "Downloading: $url -> $FILENAME"
curl -s -o "$FILENAME" "$url" && DOWNLOAD_COUNT=$((DOWNLOAD_COUNT + 1)) || echo " Failed to download: $url"
done
echo ""
echo "=== Download complete: $DOWNLOAD_COUNT files ==="
ls -la *.txt || echo "No .txt files in current directory"
# 配置Git用户信息
- name: Configure Git
run: |
git config --global user.name "GitHub Actions"
git config --global user.email "actions@github.com"
# 提交更改
- name: Commit and push changes
run: |
git add *.txt *.html 2>/dev/null || true
# 检查是否有更改
if git diff --cached --quiet; then
echo "No changes to commit"
else
FILE_COUNT=$(git diff --cached --name-only | wc -l | tr -d ' ')
git commit -m "Update JTWC products ($FILE_COUNT files): $(date -u +'%Y-%m-%d %H:%M:%S UTC')"
git push origin pgtw
fi