diff --git a/.github/workflows/run-yara-forge.yml b/.github/workflows/run-yara-forge.yml index 7ce67bf..e18b4f4 100644 --- a/.github/workflows/run-yara-forge.yml +++ b/.github/workflows/run-yara-forge.yml @@ -47,3 +47,27 @@ jobs: - name: Run YARA-Forge run: | python yara-forge.py + + - name: Upload build statistics + if: always() + uses: actions/upload-artifact@v4 + with: + name: build-statistics + path: build_stats.md + retention-days: 30 + + - name: Upload build log + if: always() + uses: actions/upload-artifact@v4 + with: + name: build-log + path: yara-forge.log + retention-days: 30 + + - name: Upload rule issues + if: always() + uses: actions/upload-artifact@v4 + with: + name: rule-issues + path: yara-forge-rule-issues.yml + retention-days: 30 diff --git a/.github/workflows/weekly-release.yml b/.github/workflows/weekly-release.yml index bc5d63d..6c18480 100644 --- a/.github/workflows/weekly-release.yml +++ b/.github/workflows/weekly-release.yml @@ -43,6 +43,30 @@ jobs: run: | python yara-forge.py + - name: Upload build statistics + if: always() + uses: actions/upload-artifact@v4 + with: + name: build-statistics + path: build_stats.md + retention-days: 90 + + - name: Upload build log + if: always() + uses: actions/upload-artifact@v4 + with: + name: build-log + path: yara-forge.log + retention-days: 90 + + - name: Upload rule issues + if: always() + uses: actions/upload-artifact@v4 + with: + name: rule-issues + path: yara-forge-rule-issues.yml + retention-days: 90 + - name: Get current date run: echo "CURRENT_DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV shell: bash diff --git a/main/rule_collector.py b/main/rule_collector.py index b141510..bffa50e 100644 --- a/main/rule_collector.py +++ b/main/rule_collector.py @@ -5,6 +5,7 @@ import shutil import datetime import logging +from urllib.parse import unquote #from pprint import pprint import plyara from git import Repo @@ -89,12 +90,20 @@ def retrieve_yara_rule_sets(repo_staging_dir, yara_repos): # If a sub-path is configured, restrict checkout to that path to skip large folders if 'path' in repo: repo_obj.git.sparse_checkout('init', '--cone') - repo_obj.git.sparse_checkout('set', repo['path']) + # URL-decode the path before using it with git sparse-checkout + decoded_path = unquote(repo['path']) + repo_obj.git.sparse_checkout('set', decoded_path) repo['commit_hash'] = repo_obj.head.commit.hexsha else: - # Get the latest commit hash + # Repository already cloned - reuse it repo_folder = os.path.join(repo_staging_dir, repo['owner'], repo['repo']) - repo['commit_hash'] = Repo(repo_folder).head.commit.hexsha + repo_obj = Repo(repo_folder) + repo['commit_hash'] = repo_obj.head.commit.hexsha + # If this repo config has a path, add it to sparse checkout + # (needed when multiple configs share the same git URL) + if 'path' in repo: + decoded_path = unquote(repo['path']) + repo_obj.git.sparse_checkout('add', decoded_path) # Walk through the extracted folders and find a LICENSE file # and save it into the repository object @@ -121,7 +130,9 @@ def retrieve_yara_rule_sets(repo_staging_dir, yara_repos): # Walk a sub folder if one is set in the config walk_folder = repo_folder if 'path' in repo: - walk_folder = os.path.join(repo_folder, repo['path']) + # URL-decode the path before using it + decoded_path = unquote(repo['path']) + walk_folder = os.path.join(repo_folder, decoded_path) # Print the processed folder logging.debug("Processing folder: %s", walk_folder) diff --git a/yara-forge-config.yml b/yara-forge-config.yml index 733c97e..1b84948 100644 --- a/yara-forge-config.yml +++ b/yara-forge-config.yml @@ -124,7 +124,7 @@ yara_repositories: author: "BlackBerry Threat Research Team" quality: 85 branch: "master" - path: "BlackBerry" + path: "Blackberry" - name: "Cluster25" url: "https://github.com/mikesxrs/Open-Source-YARA-rules" author: "Cluster25"