-
Notifications
You must be signed in to change notification settings - Fork 0
69 lines (57 loc) · 2.08 KB
/
reddit_scraper.yml
File metadata and controls
69 lines (57 loc) · 2.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
name: Reddit Scraper
on:
schedule:
- cron: "5 */2 * * *" # run every 5 minutes (UTC)
workflow_dispatch: # allow manual run from GitHub UI
jobs:
run-scraper:
runs-on: ubuntu-latest
steps:
- name: Check out repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install dependencies
run: |
pip install asyncpraw pandas
- name: Run scraper
run: python scripts/scheduler.py
env:
REDDIT_CLIENT_ID: ${{ secrets.REDDIT_CLIENT_ID }}
REDDIT_CLIENT_SECRET: ${{ secrets.REDDIT_CLIENT_SECRET }}
- name: Create timestamped copies of merged CSVs
run: |
ts=$(date -u +'%Y%m%d_%H%M%S')
for f in data_tmp/*_merged.csv; do
[ -e "$f" ] || continue
base=$(basename "$f" .csv)
cp "$f" "data_tmp/${base}_${ts}.csv"
done
- name: Set artifact name with timestamp
run: echo "ARTIFACT_NAME=reddit-scraper-data-$(date -u +'%Y%m%d_%H%M%S')" >> $GITHUB_ENV
- name: Upload CSV artifacts
uses: actions/upload-artifact@v4
with:
name: ${{ env.ARTIFACT_NAME }}
path: data_tmp/*_merged_*.csv
if-no-files-found: warn
retention-days: 3
- name: Send results by email
uses: dawidd6/action-send-mail@v6
with:
server_address: smtp.gmail.com
server_port: 465
secure: true
username: ${{ secrets.SMTP_USERNAME }}
password: ${{ secrets.SMTP_PASSWORD }}
subject: "Reddit scraper run – CSVs ready"
to: ra.nike28@gmail.com
from: ${{ secrets.SMTP_USERNAME }}
body: |
The Reddit scraper has finished running.
You can download the CSVs from this run here:
${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
Repository: ${{ github.repository }}
Run number: ${{ github.run_number }}